upstream/mercurial-mirror Commit - r39841:1cf1680b

1

# This library is free software; you can redistribute it and/or

1

# This library is free software; you can redistribute it and/or

2

# modify it under the terms of the GNU Lesser General Public

2

# modify it under the terms of the GNU Lesser General Public

3

# License as published by the Free Software Foundation; either

3

# License as published by the Free Software Foundation; either

4

# version 2.1 of the License, or (at your option) any later version.

4

# version 2.1 of the License, or (at your option) any later version.

5

#

5

#

6

# This library is distributed in the hope that it will be useful,

6

# This library is distributed in the hope that it will be useful,

7

# but WITHOUT ANY WARRANTY; without even the implied warranty of

7

# but WITHOUT ANY WARRANTY; without even the implied warranty of

8

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

8

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

9

# Lesser General Public License for more details.

9

# Lesser General Public License for more details.

10

#

10

#

11

# You should have received a copy of the GNU Lesser General Public

11

# You should have received a copy of the GNU Lesser General Public

12

# License along with this library; if not, see

12

# License along with this library; if not, see

13

# <http://www.gnu.org/licenses/>.

13

# <http://www.gnu.org/licenses/>.

14

15

# This file is part of urlgrabber, a high-level cross-protocol url-grabber

15

# This file is part of urlgrabber, a high-level cross-protocol url-grabber

16

17

18

# Modified by Benoit Boissinot:

18

# Modified by Benoit Boissinot:

19

# - fix for digest auth (inspired from urllib2.py @ Python v2.4)

19

# - fix for digest auth (inspired from urllib2.py @ Python v2.4)

20

# Modified by Dirkjan Ochtman:

20

# Modified by Dirkjan Ochtman:

21

# - import md5 function from a local util module

21

# - import md5 function from a local util module

22

# Modified by Augie Fackler:

22

# Modified by Augie Fackler:

23

# - add safesend method and use it to prevent broken pipe errors

23

# - add safesend method and use it to prevent broken pipe errors

24

# on large POST requests

24

# on large POST requests

25

26

"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.

26

"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.

27

28

>>> import urllib2

28

>>> import urllib2

29

>>> from keepalive import HTTPHandler

29

>>> from keepalive import HTTPHandler

30

>>> keepalive_handler = HTTPHandler()

30

>>> keepalive_handler = HTTPHandler()

31

>>> opener = urlreq.buildopener(keepalive_handler)

31

>>> opener = urlreq.buildopener(keepalive_handler)

32

>>> urlreq.installopener(opener)

32

>>> urlreq.installopener(opener)

33

>>>

33

>>>

34

>>> fo = urlreq.urlopen('http://www.python.org')

34

>>> fo = urlreq.urlopen('http://www.python.org')

35

36

If a connection to a given host is requested, and all of the existing

36

If a connection to a given host is requested, and all of the existing

37

connections are still in use, another connection will be opened. If

37

connections are still in use, another connection will be opened. If

38

the handler tries to use an existing connection but it fails in some

38

the handler tries to use an existing connection but it fails in some

39

way, it will be closed and removed from the pool.

39

way, it will be closed and removed from the pool.

40

41

To remove the handler, simply re-run build_opener with no arguments, and

41

To remove the handler, simply re-run build_opener with no arguments, and

42

install that opener.

42

install that opener.

43

44

You can explicitly close connections by using the close_connection()

44

You can explicitly close connections by using the close_connection()

45

method of the returned file-like object (described below) or you can

45

method of the returned file-like object (described below) or you can

46

use the handler methods:

46

use the handler methods:

47

48

close_connection(host)

48

close_connection(host)

49

close_all()

49

close_all()

50

open_connections()

50

open_connections()

51

52

NOTE: using the close_connection and close_all methods of the handler

52

NOTE: using the close_connection and close_all methods of the handler

53

should be done with care when using multiple threads.

53

should be done with care when using multiple threads.

54

* there is nothing that prevents another thread from creating new

54

* there is nothing that prevents another thread from creating new

55

connections immediately after connections are closed

55

connections immediately after connections are closed

56

* no checks are done to prevent in-use connections from being closed

56

* no checks are done to prevent in-use connections from being closed

57

58

>>> keepalive_handler.close_all()

58

>>> keepalive_handler.close_all()

59

60

EXTRA ATTRIBUTES AND METHODS

60

EXTRA ATTRIBUTES AND METHODS

61

62

Upon a status of 200, the object returned has a few additional

62

Upon a status of 200, the object returned has a few additional

63

attributes and methods, which should not be used if you want to

63

attributes and methods, which should not be used if you want to

64

remain consistent with the normal urllib2-returned objects:

64

remain consistent with the normal urllib2-returned objects:

65

66

close_connection() - close the connection to the host

66

close_connection() - close the connection to the host

67

readlines() - you know, readlines()

67

readlines() - you know, readlines()

68

status - the return status (i.e. 404)

68

status - the return status (i.e. 404)

69

reason - english translation of status (i.e. 'File not found')

69

reason - english translation of status (i.e. 'File not found')

70

71

If you want the best of both worlds, use this inside an

71

If you want the best of both worlds, use this inside an

72

AttributeError-catching try:

72

AttributeError-catching try:

73

74

>>> try: status = fo.status

74

>>> try: status = fo.status

75

>>> except AttributeError: status = None

75

>>> except AttributeError: status = None

76

77

Unfortunately, these are ONLY there if status == 200, so it's not

77

Unfortunately, these are ONLY there if status == 200, so it's not

78

easy to distinguish between non-200 responses. The reason is that

78

easy to distinguish between non-200 responses. The reason is that

79

urllib2 tries to do clever things with error codes 301, 302, 401,

79

urllib2 tries to do clever things with error codes 301, 302, 401,

80

and 407, and it wraps the object upon return.

80

and 407, and it wraps the object upon return.

81

"""

81

"""

82

83

# $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $

83

# $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $

84

85

from __future__ import absolute_import, print_function

85

from __future__ import absolute_import, print_function

86

87

import errno

87

import errno

88

import hashlib

88

import hashlib

89

import socket

89

import socket

90

import sys

90

import sys

91

import threading

91

import threading

92

93

from .i18n import _

93

from .i18n import _

94

from . import (

94

from . import (

95

node,

95

node,

96

pycompat,

96

pycompat,

97

urllibcompat,

97

urllibcompat,

98

util,

98

util,

99

)

99

)

100

from .utils import (

100

from .utils import (

101

procutil,

101

procutil,

102

)

102

)

103

104

httplib = util.httplib

104

httplib = util.httplib

105

urlerr = util.urlerr

105

urlerr = util.urlerr

106

urlreq = util.urlreq

106

urlreq = util.urlreq

107

108

DEBUG = None

108

DEBUG = None

109

110

class ConnectionManager(object):

110

class ConnectionManager(object):

111

"""

111

"""

112

The connection manager must be able to:

112

The connection manager must be able to:

113

* keep track of all existing

113

* keep track of all existing

114

"""

114

"""

115

def __init__(self):

115

def __init__(self):

116

self._lock = threading.Lock()

116

self._lock = threading.Lock()

117

self._hostmap = {} # map hosts to a list of connections

117

self._hostmap = {} # map hosts to a list of connections

118

self._connmap = {} # map connections to host

118

self._connmap = {} # map connections to host

119

self._readymap = {} # map connection to ready state

119

self._readymap = {} # map connection to ready state

120

121

def add(self, host, connection, ready):

121

def add(self, host, connection, ready):

122

self._lock.acquire()

122

self._lock.acquire()

123

try:

123

try:

124

if host not in self._hostmap:

124

if host not in self._hostmap:

125

self._hostmap[host] = []

125

self._hostmap[host] = []

126

self._hostmap[host].append(connection)

126

self._hostmap[host].append(connection)

127

self._connmap[connection] = host

127

self._connmap[connection] = host

128

self._readymap[connection] = ready

128

self._readymap[connection] = ready

129

finally:

129

finally:

130

self._lock.release()

130

self._lock.release()

131

132

def remove(self, connection):

132

def remove(self, connection):

133

self._lock.acquire()

133

self._lock.acquire()

134

try:

134

try:

135

try:

135

try:

136

host = self._connmap[connection]

136

host = self._connmap[connection]

137

except KeyError:

137

except KeyError:

138

pass

138

pass

139

else:

139

else:

140

del self._connmap[connection]

140

del self._connmap[connection]

141

del self._readymap[connection]

141

del self._readymap[connection]

142

self._hostmap[host].remove(connection)

142

self._hostmap[host].remove(connection)

143

if not self._hostmap[host]:

143

if not self._hostmap[host]:

144

del self._hostmap[host]

144

del self._hostmap[host]

145

finally:

145

finally:

146

self._lock.release()

146

self._lock.release()

147

148

def set_ready(self, connection, ready):

148

def set_ready(self, connection, ready):

149

try:

149

try:

150

self._readymap[connection] = ready

150

self._readymap[connection] = ready

151

except KeyError:

151

except KeyError:

152

pass

152

pass

153

154

def get_ready_conn(self, host):

154

def get_ready_conn(self, host):

155

conn = None

155

conn = None

156

self._lock.acquire()

156

self._lock.acquire()

157

try:

157

try:

158

if host in self._hostmap:

158

if host in self._hostmap:

159

for c in self._hostmap[host]:

159

for c in self._hostmap[host]:

160

if self._readymap[c]:

160

if self._readymap[c]:

161

self._readymap[c] = 0

161

self._readymap[c] = 0

162

conn = c

162

conn = c

163

break

163

break

164

finally:

164

finally:

165

self._lock.release()

165

self._lock.release()

166

return conn

166

return conn

167

168

def get_all(self, host=None):

168

def get_all(self, host=None):

169

if host:

169

if host:

170

return list(self._hostmap.get(host, []))

170

return list(self._hostmap.get(host, []))

171

else:

171

else:

172

return dict(self._hostmap)

172

return dict(self._hostmap)

173

174

class KeepAliveHandler(object):

174

class KeepAliveHandler(object):

175

def __init__(self):

175

def __init__(self):

176

self._cm = ConnectionManager()

176

self._cm = ConnectionManager()

177

178

#### Connection Management

178

#### Connection Management

179

def open_connections(self):

179

def open_connections(self):

180

"""return a list of connected hosts and the number of connections

180

"""return a list of connected hosts and the number of connections

181

to each. [('foo.com:80', 2), ('bar.org', 1)]"""

181

to each. [('foo.com:80', 2), ('bar.org', 1)]"""

182

return [(host, len(li)) for (host, li) in self._cm.get_all().items()]

182

return [(host, len(li)) for (host, li) in self._cm.get_all().items()]

183

184

def close_connection(self, host):

184

def close_connection(self, host):

185

"""close connection(s) to <host>

185

"""close connection(s) to <host>

186

host is the host:port spec, as in 'www.cnn.com:8080' as passed in.

186

host is the host:port spec, as in 'www.cnn.com:8080' as passed in.

187

no error occurs if there is no connection to that host."""

187

no error occurs if there is no connection to that host."""

188

for h in self._cm.get_all(host):

188

for h in self._cm.get_all(host):

189

self._cm.remove(h)

189

self._cm.remove(h)

190

h.close()

190

h.close()

191

192

def close_all(self):

192

def close_all(self):

193

"""close all open connections"""

193

"""close all open connections"""

194

for host, conns in self._cm.get_all().iteritems():

194

for host, conns in self._cm.get_all().iteritems():

195

for h in conns:

195

for h in conns:

196

self._cm.remove(h)

196

self._cm.remove(h)

197

h.close()

197

h.close()

198

199

def _request_closed(self, request, host, connection):

199

def _request_closed(self, request, host, connection):

200

"""tells us that this request is now closed and that the

200

"""tells us that this request is now closed and that the

201

connection is ready for another request"""

201

connection is ready for another request"""

202

self._cm.set_ready(connection, 1)

202

self._cm.set_ready(connection, 1)

203

204

def _remove_connection(self, host, connection, close=0):

204

def _remove_connection(self, host, connection, close=0):

205

if close:

205

if close:

206

connection.close()

206

connection.close()

207

self._cm.remove(connection)

207

self._cm.remove(connection)

208

209

#### Transaction Execution

209

#### Transaction Execution

210

def http_open(self, req):

210

def http_open(self, req):

211

return self.do_open(HTTPConnection, req)

211

return self.do_open(HTTPConnection, req)

212

213

def do_open(self, http_class, req):

213

def do_open(self, http_class, req):

214

host = urllibcompat.gethost(req)

214

host = urllibcompat.gethost(req)

215

if not host:

215

if not host:

216

raise urlerr.urlerror('no host given')

216

raise urlerr.urlerror('no host given')

217

218

try:

218

try:

219

h = self._cm.get_ready_conn(host)

219

h = self._cm.get_ready_conn(host)

220

while h:

220

while h:

221

r = self._reuse_connection(h, req, host)

221

r = self._reuse_connection(h, req, host)

222

223

# if this response is non-None, then it worked and we're

223

# if this response is non-None, then it worked and we're

224

# done. Break out, skipping the else block.

224

# done. Break out, skipping the else block.

225

if r:

225

if r:

226

break

226

break

227

228

# connection is bad - possibly closed by server

228

# connection is bad - possibly closed by server

229

# discard it and ask for the next free connection

229

# discard it and ask for the next free connection

230

h.close()

230

h.close()

231

self._cm.remove(h)

231

self._cm.remove(h)

232

h = self._cm.get_ready_conn(host)

232

h = self._cm.get_ready_conn(host)

233

else:

233

else:

234

# no (working) free connections were found. Create a new one.

234

# no (working) free connections were found. Create a new one.

235

h = http_class(host)

235

h = http_class(host)

236

if DEBUG:

236

if DEBUG:

237

DEBUG.info("creating new connection to %s (%d)",

237

DEBUG.info("creating new connection to %s (%d)",

238

host, id(h))

238

host, id(h))

239

self._cm.add(host, h, 0)

239

self._cm.add(host, h, 0)

240

self._start_transaction(h, req)

240

self._start_transaction(h, req)

241

r = h.getresponse()

241

r = h.getresponse()

242

# The string form of BadStatusLine is the status line. Add some context

242

# The string form of BadStatusLine is the status line. Add some context

243

# to make the error message slightly more useful.

243

# to make the error message slightly more useful.

244

except httplib.BadStatusLine as err:

244

except httplib.BadStatusLine as err:

245

raise urlerr.urlerror(

245

raise urlerr.urlerror(

246

_('bad HTTP status line: %s') % pycompat.sysbytes(err.line))

246

_('bad HTTP status line: %s') % pycompat.sysbytes(err.line))

247

except (socket.error, httplib.HTTPException) as err:

247

except (socket.error, httplib.HTTPException) as err:

248

raise urlerr.urlerror(err)

248

raise urlerr.urlerror(err)

249

250

# If not a persistent connection, don't try to reuse it. Look

250

# If not a persistent connection, don't try to reuse it. Look

251

# for this using getattr() since vcr doesn't define this

251

# for this using getattr() since vcr doesn't define this

252

# attribute, and in that case always close the connection.

252

# attribute, and in that case always close the connection.

253

if getattr(r, r'will_close', True):

253

if getattr(r, r'will_close', True):

254

self._cm.remove(h)

254

self._cm.remove(h)

255

256

if DEBUG:

256

if DEBUG:

257

DEBUG.info("STATUS: %s, %s", r.status, r.reason)

257

DEBUG.info("STATUS: %s, %s", r.status, r.reason)

258

r._handler = self

258

r._handler = self

259

r._host = host

259

r._host = host

260

r._url = req.get_full_url()

260

r._url = req.get_full_url()

261

r._connection = h

261

r._connection = h

262

r.code = r.status

262

r.code = r.status

263

r.headers = r.msg

263

r.headers = r.msg

264

r.msg = r.reason

264

r.msg = r.reason

265

266

return r

266

return r

267

268

def _reuse_connection(self, h, req, host):

268

def _reuse_connection(self, h, req, host):

269

"""start the transaction with a re-used connection

269

"""start the transaction with a re-used connection

270

return a response object (r) upon success or None on failure.

270

return a response object (r) upon success or None on failure.

271

This DOES not close or remove bad connections in cases where

271

This DOES not close or remove bad connections in cases where

272

it returns. However, if an unexpected exception occurs, it

272

it returns. However, if an unexpected exception occurs, it

273

will close and remove the connection before re-raising.

273

will close and remove the connection before re-raising.

274

"""

274

"""

275

try:

275

try:

276

self._start_transaction(h, req)

276

self._start_transaction(h, req)

277

r = h.getresponse()

277

r = h.getresponse()

278

# note: just because we got something back doesn't mean it

278

# note: just because we got something back doesn't mean it

279

# worked. We'll check the version below, too.

279

# worked. We'll check the version below, too.

280

except (socket.error, httplib.HTTPException):

280

except (socket.error, httplib.HTTPException):

281

r = None

281

r = None

282

except: # re-raises

282

except: # re-raises

283

# adding this block just in case we've missed

283

# adding this block just in case we've missed

284

# something we will still raise the exception, but

284

# something we will still raise the exception, but

285

# lets try and close the connection and remove it

285

# lets try and close the connection and remove it

286

# first. We previously got into a nasty loop

286

# first. We previously got into a nasty loop

287

# where an exception was uncaught, and so the

287

# where an exception was uncaught, and so the

288

# connection stayed open. On the next try, the

288

# connection stayed open. On the next try, the

289

# same exception was raised, etc. The trade-off is

289

# same exception was raised, etc. The trade-off is

290

# that it's now possible this call will raise

290

# that it's now possible this call will raise

291

# a DIFFERENT exception

291

# a DIFFERENT exception

292

if DEBUG:

292

if DEBUG:

293

DEBUG.error("unexpected exception - closing "

293

DEBUG.error("unexpected exception - closing "

294

"connection to %s (%d)", host, id(h))

294

"connection to %s (%d)", host, id(h))

295

self._cm.remove(h)

295

self._cm.remove(h)

296

h.close()

296

h.close()

297

raise

297

raise

298

299

if r is None or r.version == 9:

299

if r is None or r.version == 9:

300

# httplib falls back to assuming HTTP 0.9 if it gets a

300

# httplib falls back to assuming HTTP 0.9 if it gets a

301

# bad header back. This is most likely to happen if

301

# bad header back. This is most likely to happen if

302

# the socket has been closed by the server since we

302

# the socket has been closed by the server since we

303

# last used the connection.

303

# last used the connection.

304

if DEBUG:

304

if DEBUG:

305

DEBUG.info("failed to re-use connection to %s (%d)",

305

DEBUG.info("failed to re-use connection to %s (%d)",

306

host, id(h))

306

host, id(h))

307

r = None

307

r = None

308

else:

308

else:

309

if DEBUG:

309

if DEBUG:

310

DEBUG.info("re-using connection to %s (%d)", host, id(h))

310

DEBUG.info("re-using connection to %s (%d)", host, id(h))

311

312

return r

312

return r

313

314

def _start_transaction(self, h, req):

314

def _start_transaction(self, h, req):

315

# What follows mostly reimplements HTTPConnection.request()

315

# What follows mostly reimplements HTTPConnection.request()

316

# except it adds self.parent.addheaders in the mix and sends headers

316

# except it adds self.parent.addheaders in the mix and sends headers

317

# in a deterministic order (to make testing easier).

317

# in a deterministic order (to make testing easier).

318

headers = util.sortdict(self.parent.addheaders)

318

headers = util.sortdict(self.parent.addheaders)

319

headers.update(sorted(req.headers.items()))

319

headers.update(sorted(req.headers.items()))

320

headers.update(sorted(req.unredirected_hdrs.items()))

320

headers.update(sorted(req.unredirected_hdrs.items()))

321

headers = util.sortdict((n.lower(), v) for n, v in headers.items())

321

headers = util.sortdict((n.lower(), v) for n, v in headers.items())

322

skipheaders = {}

322

skipheaders = {}

323

for n in (r'host', r'accept-encoding'):

323

for n in (r'host', r'accept-encoding'):

324

if n in headers:

324

if n in headers:

325

skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1

325

skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1

326

try:

326

try:

327

if urllibcompat.hasdata(req):

327

if urllibcompat.hasdata(req):

328

data = urllibcompat.getdata(req)

328

data = urllibcompat.getdata(req)

329

h.putrequest(

329

h.putrequest(

330

req.get_method(), urllibcompat.getselector(req),

330

req.get_method(), urllibcompat.getselector(req),

331

**skipheaders)

331

**skipheaders)

332

if r'content-type' not in headers:

332

if r'content-type' not in headers:

333

h.putheader(r'Content-type',

333

h.putheader(r'Content-type',

334

r'application/x-www-form-urlencoded')

334

r'application/x-www-form-urlencoded')

335

if r'content-length' not in headers:

335

if r'content-length' not in headers:

336

h.putheader(r'Content-length', r'%d' % len(data))

336

h.putheader(r'Content-length', r'%d' % len(data))

337

else:

337

else:

338

h.putrequest(

338

h.putrequest(

339

req.get_method(), urllibcompat.getselector(req),

339

req.get_method(), urllibcompat.getselector(req),

340

**skipheaders)

340

**skipheaders)

341

except socket.error as err:

341

except socket.error as err:

342

raise urlerr.urlerror(err)

342

raise urlerr.urlerror(err)

343

for k, v in headers.items():

343

for k, v in headers.items():

344

h.putheader(k, v)

344

h.putheader(k, v)

345

h.endheaders()

345

h.endheaders()

346

if urllibcompat.hasdata(req):

346

if urllibcompat.hasdata(req):

347

h.send(data)

347

h.send(data)

348

349

class HTTPHandler(KeepAliveHandler, urlreq.httphandler):

349

class HTTPHandler(KeepAliveHandler, urlreq.httphandler):

350

pass

350

pass

351

352

class HTTPResponse(httplib.HTTPResponse):

352

class HTTPResponse(httplib.HTTPResponse):

353

# we need to subclass HTTPResponse in order to

353

# we need to subclass HTTPResponse in order to

354

# 1) add readline(), readlines(), and readinto() methods

354

# 1) add readline(), readlines(), and readinto() methods

355

# 2) add close_connection() methods

355

# 2) add close_connection() methods

356

# 3) add info() and geturl() methods

356

# 3) add info() and geturl() methods

357

358

# in order to add readline(), read must be modified to deal with a

358

# in order to add readline(), read must be modified to deal with a

359

# buffer. example: readline must read a buffer and then spit back

359

# buffer. example: readline must read a buffer and then spit back

360

# one line at a time. The only real alternative is to read one

360

# one line at a time. The only real alternative is to read one

361

# BYTE at a time (ick). Once something has been read, it can't be

361

# BYTE at a time (ick). Once something has been read, it can't be

362

# put back (ok, maybe it can, but that's even uglier than this),

362

# put back (ok, maybe it can, but that's even uglier than this),

363

# so if you THEN do a normal read, you must first take stuff from

363

# so if you THEN do a normal read, you must first take stuff from

364

# the buffer.

364

# the buffer.

365

366

# the read method wraps the original to accommodate buffering,

366

# the read method wraps the original to accommodate buffering,

367

# although read() never adds to the buffer.

367

# although read() never adds to the buffer.

368

# Both readline and readlines have been stolen with almost no

368

# Both readline and readlines have been stolen with almost no

369

# modification from socket.py

369

# modification from socket.py

370

371

372

def __init__(self, sock, debuglevel=0, strict=0, method=None):

372

def __init__(self, sock, debuglevel=0, strict=0, method=None):

373

extrakw = {}

373

extrakw = {}

374

if not pycompat.ispy3:

374

if not pycompat.ispy3:

375

extrakw[r'strict'] = True

375

extrakw[r'strict'] = True

376

extrakw[r'buffering'] = True

376

extrakw[r'buffering'] = True

377

httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,

377

httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,

378

method=method, **extrakw)

378

method=method, **extrakw)

379

self.fileno = sock.fileno

379

self.fileno = sock.fileno

380

self.code = None

380

self.code = None

381

self._rbuf = ''

381

self._rbuf = ''

382

self._rbufsize = 8096

382

self._rbufsize = 8096

383

self._handler = None # inserted by the handler later

383

self._handler = None # inserted by the handler later

384

self._host = None # (same)

384

self._host = None # (same)

385

self._url = None # (same)

385

self._url = None # (same)

386

self._connection = None # (same)

386

self._connection = None # (same)

387

388

_raw_read = httplib.HTTPResponse.read

388

_raw_read = httplib.HTTPResponse.read

389

_raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)

389

_raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)

390

391

def close(self):

391

def close(self):

392

if self.fp:

392

if self.fp:

393

self.fp.close()

393

self.fp.close()

394

self.fp = None

394

self.fp = None

395

if self._handler:

395

if self._handler:

396

self._handler._request_closed(self, self._host,

396

self._handler._request_closed(self, self._host,

397

self._connection)

397

self._connection)

398

399

def close_connection(self):

399

def close_connection(self):

400

self._handler._remove_connection(self._host, self._connection, close=1)

400

self._handler._remove_connection(self._host, self._connection, close=1)

401

self.close()

401

self.close()

402

403

def info(self):

403

def info(self):

404

return self.headers

404

return self.headers

405

406

def geturl(self):

406

def geturl(self):

407

return self._url

407

return self._url

408

409

def read(self, amt=None):

409

def read(self, amt=None):

410

# the _rbuf test is only in this first if for speed. It's not

410

# the _rbuf test is only in this first if for speed. It's not

411

# logically necessary

411

# logically necessary

412

if self._rbuf and amt is not None:

412

if self._rbuf and amt is not None:

413

L = len(self._rbuf)

413

L = len(self._rbuf)

414

if amt > L:

414

if amt > L:

415

amt -= L

415

amt -= L

416

else:

416

else:

417

s = self._rbuf[:amt]

417

s = self._rbuf[:amt]

418

self._rbuf = self._rbuf[amt:]

418

self._rbuf = self._rbuf[amt:]

419

return s

419

return s

420

# Careful! http.client.HTTPResponse.read() on Python 3 is

421

s = self._rbuf + self._raw_read(amt)

421

# implemented using readinto(), which can duplicate self._rbuf

422

# if it's not empty.

423

s = self._rbuf

422

self._rbuf = ''

424

self._rbuf = ''

425

s += self._raw_read(amt)

423

return s

426

return s

424

427

425

# stolen from Python SVN #68532 to fix issue1088

428

# stolen from Python SVN #68532 to fix issue1088

426

def _read_chunked(self, amt):

429

def _read_chunked(self, amt):

427

chunk_left = self.chunk_left

430

chunk_left = self.chunk_left

428

parts = []

431

parts = []

429

432

430

while True:

433

while True:

431

if chunk_left is None:

434

if chunk_left is None:

432

line = self.fp.readline()

435

line = self.fp.readline()

433

i = line.find(';')

436

i = line.find(';')

434

if i >= 0:

437

if i >= 0:

435

line = line[:i] # strip chunk-extensions

438

line = line[:i] # strip chunk-extensions

436

try:

439

try:

437

chunk_left = int(line, 16)

440

chunk_left = int(line, 16)

438

except ValueError:

441

except ValueError:

439

# close the connection as protocol synchronization is

442

# close the connection as protocol synchronization is

440

# probably lost

443

# probably lost

441

self.close()

444

self.close()

442

raise httplib.IncompleteRead(''.join(parts))

445

raise httplib.IncompleteRead(''.join(parts))

443

if chunk_left == 0:

446

if chunk_left == 0:

444

break

447

break

445

if amt is None:

448

if amt is None:

446

parts.append(self._safe_read(chunk_left))

449

parts.append(self._safe_read(chunk_left))

447

elif amt < chunk_left:

450

elif amt < chunk_left:

448

parts.append(self._safe_read(amt))

451

parts.append(self._safe_read(amt))

449

self.chunk_left = chunk_left - amt

452

self.chunk_left = chunk_left - amt

450

return ''.join(parts)

453

return ''.join(parts)

451

elif amt == chunk_left:

454

elif amt == chunk_left:

452

parts.append(self._safe_read(amt))

455

parts.append(self._safe_read(amt))

453

self._safe_read(2) # toss the CRLF at the end of the chunk

456

self._safe_read(2) # toss the CRLF at the end of the chunk

454

self.chunk_left = None

457

self.chunk_left = None

455

return ''.join(parts)

458

return ''.join(parts)

456

else:

459

else:

457

parts.append(self._safe_read(chunk_left))

460

parts.append(self._safe_read(chunk_left))

458

amt -= chunk_left

461

amt -= chunk_left

459

462

460

# we read the whole chunk, get another

463

# we read the whole chunk, get another

461

self._safe_read(2) # toss the CRLF at the end of the chunk

464

self._safe_read(2) # toss the CRLF at the end of the chunk

462

chunk_left = None

465

chunk_left = None

463

466

464

# read and discard trailer up to the CRLF terminator

467

# read and discard trailer up to the CRLF terminator

465

### note: we shouldn't have any trailers!

468

### note: we shouldn't have any trailers!

466

while True:

469

while True:

467

line = self.fp.readline()

470

line = self.fp.readline()

468

if not line:

471

if not line:

469

# a vanishingly small number of sites EOF without

472

# a vanishingly small number of sites EOF without

470

# sending the trailer

473

# sending the trailer

471

break

474

break

472

if line == '\r\n':

475

if line == '\r\n':

473

break

476

break

474

477

475

# we read everything; close the "file"

478

# we read everything; close the "file"

476

self.close()

479

self.close()

477

480

478

return ''.join(parts)

481

return ''.join(parts)

479

482

480

def readline(self):

483

def readline(self):

481

# Fast path for a line is already available in read buffer.

484

# Fast path for a line is already available in read buffer.

482

i = self._rbuf.find('\n')

485

i = self._rbuf.find('\n')

483

if i >= 0:

486

if i >= 0:

484

i += 1

487

i += 1

485

line = self._rbuf[:i]

488

line = self._rbuf[:i]

486

self._rbuf = self._rbuf[i:]

489

self._rbuf = self._rbuf[i:]

487

return line

490

return line

488

491

489

# No newline in local buffer. Read until we find one.

492

# No newline in local buffer. Read until we find one.

490

chunks = [self._rbuf]

493

chunks = [self._rbuf]

491

i = -1

494

i = -1

492

readsize = self._rbufsize

495

readsize = self._rbufsize

493

while True:

496

while True:

494

new = self._raw_read(readsize)

497

new = self._raw_read(readsize)

495

if not new:

498

if not new:

496

break

499

break

497

500

498

chunks.append(new)

501

chunks.append(new)

499

i = new.find('\n')

502

i = new.find('\n')

500

if i >= 0:

503

if i >= 0:

501

break

504

break

502

505

503

# We either have exhausted the stream or have a newline in chunks[-1].

506

# We either have exhausted the stream or have a newline in chunks[-1].

504

507

505

# EOF

508

# EOF

506

if i == -1:

509

if i == -1:

507

self._rbuf = ''

510

self._rbuf = ''

508

return ''.join(chunks)

511

return ''.join(chunks)

509

512

510

i += 1

513

i += 1

511

self._rbuf = chunks[-1][i:]

514

self._rbuf = chunks[-1][i:]

512

chunks[-1] = chunks[-1][:i]

515

chunks[-1] = chunks[-1][:i]

513

return ''.join(chunks)

516

return ''.join(chunks)

514

517

515

def readlines(self, sizehint=0):

518

def readlines(self, sizehint=0):

516

total = 0

519

total = 0

517

list = []

520

list = []

518

while True:

521

while True:

519

line = self.readline()

522

line = self.readline()

520

if not line:

523

if not line:

521

break

524

break

522

list.append(line)

525

list.append(line)

523

total += len(line)

526

total += len(line)

524

if sizehint and total >= sizehint:

527

if sizehint and total >= sizehint:

525

break

528

break

526

return list

529

return list

527

530

528

def readinto(self, dest):

531

def readinto(self, dest):

529

if self._raw_readinto is None:

532

if self._raw_readinto is None:

530

res = self.read(len(dest))

533

res = self.read(len(dest))

531

if not res:

534

if not res:

532

return 0

535

return 0

533

dest[0:len(res)] = res

536

dest[0:len(res)] = res

534

return len(res)

537

return len(res)

535

total = len(dest)

538

total = len(dest)

536

have = len(self._rbuf)

539

have = len(self._rbuf)

537

if have >= total:

540

if have >= total:

538

dest[0:total] = self._rbuf[:total]

541

dest[0:total] = self._rbuf[:total]

539

self._rbuf = self._rbuf[total:]

542

self._rbuf = self._rbuf[total:]

540

return total

543

return total

541

mv = memoryview(dest)

544

mv = memoryview(dest)

542

got = self._raw_readinto(mv[have:total])

545

got = self._raw_readinto(mv[have:total])

543

dest[0:have] = self._rbuf

546

dest[0:have] = self._rbuf

544

got += len(self._rbuf)

547

got += len(self._rbuf)

545

self._rbuf = ''

548

self._rbuf = ''

546

return got

549

return got

547

550

548

def safesend(self, str):

551

def safesend(self, str):

549

"""Send `str' to the server.

552

"""Send `str' to the server.

550

553

551

Shamelessly ripped off from httplib to patch a bad behavior.

554

Shamelessly ripped off from httplib to patch a bad behavior.

552

"""

555

"""

553

# _broken_pipe_resp is an attribute we set in this function

556

# _broken_pipe_resp is an attribute we set in this function

554

# if the socket is closed while we're sending data but

557

# if the socket is closed while we're sending data but

555

# the server sent us a response before hanging up.

558

# the server sent us a response before hanging up.

556

# In that case, we want to pretend to send the rest of the

559

# In that case, we want to pretend to send the rest of the

557

# outgoing data, and then let the user use getresponse()

560

# outgoing data, and then let the user use getresponse()

558

# (which we wrap) to get this last response before

561

# (which we wrap) to get this last response before

559

# opening a new socket.

562

# opening a new socket.

560

if getattr(self, '_broken_pipe_resp', None) is not None:

563

if getattr(self, '_broken_pipe_resp', None) is not None:

561

return

564

return

562

565

563

if self.sock is None:

566

if self.sock is None:

564

if self.auto_open:

567

if self.auto_open:

565

self.connect()

568

self.connect()

566

else:

569

else:

567

raise httplib.NotConnected

570

raise httplib.NotConnected

568

571

569

# send the data to the server. if we get a broken pipe, then close

572

# send the data to the server. if we get a broken pipe, then close

570

# the socket. we want to reconnect when somebody tries to send again.

573

# the socket. we want to reconnect when somebody tries to send again.

571

#

574

#

572

# NOTE: we DO propagate the error, though, because we cannot simply

575

# NOTE: we DO propagate the error, though, because we cannot simply

573

# ignore the error... the caller will know if they can retry.

576

# ignore the error... the caller will know if they can retry.

574

if self.debuglevel > 0:

577

if self.debuglevel > 0:

575

print("send:", repr(str))

578

print("send:", repr(str))

576

try:

579

try:

577

blocksize = 8192

580

blocksize = 8192

578

read = getattr(str, 'read', None)

581

read = getattr(str, 'read', None)

579

if read is not None:

582

if read is not None:

580

if self.debuglevel > 0:

583

if self.debuglevel > 0:

581

print("sending a read()able")

584

print("sending a read()able")

582

data = read(blocksize)

585

data = read(blocksize)

583

while data:

586

while data:

584

self.sock.sendall(data)

587

self.sock.sendall(data)

585

data = read(blocksize)

588

data = read(blocksize)

586

else:

589

else:

587

self.sock.sendall(str)

590

self.sock.sendall(str)

588

except socket.error as v:

591

except socket.error as v:

589

reraise = True

592

reraise = True

590

if v[0] == errno.EPIPE: # Broken pipe

593

if v[0] == errno.EPIPE: # Broken pipe

591

if self._HTTPConnection__state == httplib._CS_REQ_SENT:

594

if self._HTTPConnection__state == httplib._CS_REQ_SENT:

592

self._broken_pipe_resp = None

595

self._broken_pipe_resp = None

593

self._broken_pipe_resp = self.getresponse()

596

self._broken_pipe_resp = self.getresponse()

594

reraise = False

597

reraise = False

595

self.close()

598

self.close()

596

if reraise:

599

if reraise:

597

raise

600

raise

598

601

599

def wrapgetresponse(cls):

602

def wrapgetresponse(cls):

600

"""Wraps getresponse in cls with a broken-pipe sane version.

603

"""Wraps getresponse in cls with a broken-pipe sane version.

601

"""

604

"""

602

def safegetresponse(self):

605

def safegetresponse(self):

603

# In safesend() we might set the _broken_pipe_resp

606

# In safesend() we might set the _broken_pipe_resp

604

# attribute, in which case the socket has already

607

# attribute, in which case the socket has already

605

# been closed and we just need to give them the response

608

# been closed and we just need to give them the response

606

# back. Otherwise, we use the normal response path.

609

# back. Otherwise, we use the normal response path.

607

r = getattr(self, '_broken_pipe_resp', None)

610

r = getattr(self, '_broken_pipe_resp', None)

608

if r is not None:

611

if r is not None:

609

return r

612

return r

610

return cls.getresponse(self)

613

return cls.getresponse(self)

611

safegetresponse.__doc__ = cls.getresponse.__doc__

614

safegetresponse.__doc__ = cls.getresponse.__doc__

612

return safegetresponse

615

return safegetresponse

613

616

614

class HTTPConnection(httplib.HTTPConnection):

617

class HTTPConnection(httplib.HTTPConnection):

615

# use the modified response class

618

# use the modified response class

616

response_class = HTTPResponse

619

response_class = HTTPResponse

617

send = safesend

620

send = safesend

618

getresponse = wrapgetresponse(httplib.HTTPConnection)

621

getresponse = wrapgetresponse(httplib.HTTPConnection)

619

622

620

623

621

#########################################################################

624

#########################################################################

622

##### TEST FUNCTIONS

625

##### TEST FUNCTIONS

623

#########################################################################

626

#########################################################################

624

627

625

628

626

def continuity(url):

629

def continuity(url):

627

md5 = hashlib.md5

630

md5 = hashlib.md5

628

format = '%25s: %s'

631

format = '%25s: %s'

629

632

630

# first fetch the file with the normal http handler

633

# first fetch the file with the normal http handler

631

opener = urlreq.buildopener()

634

opener = urlreq.buildopener()

632

urlreq.installopener(opener)

635

urlreq.installopener(opener)

633

fo = urlreq.urlopen(url)

636

fo = urlreq.urlopen(url)

634

foo = fo.read()

637

foo = fo.read()

635

fo.close()

638

fo.close()

636

m = md5(foo)

639

m = md5(foo)

637

print(format % ('normal urllib', node.hex(m.digest())))

640

print(format % ('normal urllib', node.hex(m.digest())))

638

641

639

# now install the keepalive handler and try again

642

# now install the keepalive handler and try again

640

opener = urlreq.buildopener(HTTPHandler())

643

opener = urlreq.buildopener(HTTPHandler())

641

urlreq.installopener(opener)

644

urlreq.installopener(opener)

642

645

643

fo = urlreq.urlopen(url)

646

fo = urlreq.urlopen(url)

644

foo = fo.read()

647

foo = fo.read()

645

fo.close()

648

fo.close()

646

m = md5(foo)

649

m = md5(foo)

647

print(format % ('keepalive read', node.hex(m.digest())))

650

print(format % ('keepalive read', node.hex(m.digest())))

648

651

649

fo = urlreq.urlopen(url)

652

fo = urlreq.urlopen(url)

650

foo = ''

653

foo = ''

651

while True:

654

while True:

652

f = fo.readline()

655

f = fo.readline()

653

if f:

656

if f:

654

foo = foo + f

657

foo = foo + f

655

else:

658

else:

656

break

659

break

657

fo.close()

660

fo.close()

658

m = md5(foo)

661

m = md5(foo)

659

print(format % ('keepalive readline', node.hex(m.digest())))

662

print(format % ('keepalive readline', node.hex(m.digest())))

660

663

661

def comp(N, url):

664

def comp(N, url):

662

print(' making %i connections to:\n %s' % (N, url))

665

print(' making %i connections to:\n %s' % (N, url))

663

666

664

procutil.stdout.write(' first using the normal urllib handlers')

667

procutil.stdout.write(' first using the normal urllib handlers')

665

# first use normal opener

668

# first use normal opener

666

opener = urlreq.buildopener()

669

opener = urlreq.buildopener()

667

urlreq.installopener(opener)

670

urlreq.installopener(opener)

668

t1 = fetch(N, url)

671

t1 = fetch(N, url)

669

print(' TIME: %.3f s' % t1)

672

print(' TIME: %.3f s' % t1)

670

673

671

procutil.stdout.write(' now using the keepalive handler ')

674

procutil.stdout.write(' now using the keepalive handler ')

672

# now install the keepalive handler and try again

675

# now install the keepalive handler and try again

673

opener = urlreq.buildopener(HTTPHandler())

676

opener = urlreq.buildopener(HTTPHandler())

674

urlreq.installopener(opener)

677

urlreq.installopener(opener)

675

t2 = fetch(N, url)

678

t2 = fetch(N, url)

676

print(' TIME: %.3f s' % t2)

679

print(' TIME: %.3f s' % t2)

677

print(' improvement factor: %.2f' % (t1 / t2))

680

print(' improvement factor: %.2f' % (t1 / t2))

678

681

679

def fetch(N, url, delay=0):

682

def fetch(N, url, delay=0):

680

import time

683

import time

681

lens = []

684

lens = []

682

starttime = time.time()

685

starttime = time.time()

683

for i in range(N):

686

for i in range(N):

684

if delay and i > 0:

687

if delay and i > 0:

685

time.sleep(delay)

688

time.sleep(delay)

686

fo = urlreq.urlopen(url)

689

fo = urlreq.urlopen(url)

687

foo = fo.read()

690

foo = fo.read()

688

fo.close()

691

fo.close()

689

lens.append(len(foo))

692

lens.append(len(foo))

690

diff = time.time() - starttime

693

diff = time.time() - starttime

691

694

692

j = 0

695

j = 0

693

for i in lens[1:]:

696

for i in lens[1:]:

694

j = j + 1

697

j = j + 1

695

if not i == lens[0]:

698

if not i == lens[0]:

696

print("WARNING: inconsistent length on read %i: %i" % (j, i))

699

print("WARNING: inconsistent length on read %i: %i" % (j, i))

697

700

698

return diff

701

return diff

699

702

700

def test_timeout(url):

703

def test_timeout(url):

701

global DEBUG

704

global DEBUG

702

dbbackup = DEBUG

705

dbbackup = DEBUG

703

class FakeLogger(object):

706

class FakeLogger(object):

704

def debug(self, msg, *args):

707

def debug(self, msg, *args):

705

print(msg % args)

708

print(msg % args)

706

info = warning = error = debug

709

info = warning = error = debug

707

DEBUG = FakeLogger()

710

DEBUG = FakeLogger()

708

print(" fetching the file to establish a connection")

711

print(" fetching the file to establish a connection")

709

fo = urlreq.urlopen(url)

712

fo = urlreq.urlopen(url)

710

data1 = fo.read()

713

data1 = fo.read()

711

fo.close()

714

fo.close()

712

715

713

i = 20

716

i = 20

714

print(" waiting %i seconds for the server to close the connection" % i)

717

print(" waiting %i seconds for the server to close the connection" % i)

715

while i > 0:

718

while i > 0:

716

procutil.stdout.write('\r %2i' % i)

719

procutil.stdout.write('\r %2i' % i)

717

procutil.stdout.flush()

720

procutil.stdout.flush()

718

time.sleep(1)

721

time.sleep(1)

719

i -= 1

722

i -= 1

720

procutil.stderr.write('\r')

723

procutil.stderr.write('\r')

721

724

722

print(" fetching the file a second time")

725

print(" fetching the file a second time")

723

fo = urlreq.urlopen(url)

726

fo = urlreq.urlopen(url)

724

data2 = fo.read()

727

data2 = fo.read()

725

fo.close()

728

fo.close()

726

729

727

if data1 == data2:

730

if data1 == data2:

728

print(' data are identical')

731

print(' data are identical')

729

else:

732

else:

730

print(' ERROR: DATA DIFFER')

733

print(' ERROR: DATA DIFFER')

731

734

732

DEBUG = dbbackup

735

DEBUG = dbbackup

733

736

734

737

735

def test(url, N=10):

738

def test(url, N=10):

736

print("performing continuity test (making sure stuff isn't corrupted)")

739

print("performing continuity test (making sure stuff isn't corrupted)")

737

continuity(url)

740

continuity(url)

738

print('')

741

print('')

739

print("performing speed comparison")

742

print("performing speed comparison")

740

comp(N, url)

743

comp(N, url)

741

print('')

744

print('')

742

print("performing dropped-connection check")

745

print("performing dropped-connection check")

743

test_timeout(url)

746

test_timeout(url)

744

747

745

if __name__ == '__main__':

748

if __name__ == '__main__':

746

import time

749

import time

747

try:

750

try:

748

N = int(sys.argv[1])

751

N = int(sys.argv[1])

749

url = sys.argv[2]

752

url = sys.argv[2]

750

except (IndexError, ValueError):

753

except (IndexError, ValueError):

751

print("%s <integer> <url>" % sys.argv[0])

754

print("%s <integer> <url>" % sys.argv[0])

752

else:

755

else:

753

test(url, N)

756

test(url, N)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #   This library is free software; you can redistribute it and/or
             #   modify it under the terms of the GNU Lesser General Public
             #   License as published by the Free Software Foundation; either
             #   version 2.1 of the License, or (at your option) any later version.
             #
             #   This library is distributed in the hope that it will be useful,
             #   but WITHOUT ANY WARRANTY; without even the implied warranty of
             #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
             #   Lesser General Public License for more details.
             #
             #   You should have received a copy of the GNU Lesser General Public
             #   License along with this library; if not, see
             #   <http://www.gnu.org/licenses/>.
             # This file is part of urlgrabber, a high-level cross-protocol url-grabber
             # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
             # Modified by Benoit Boissinot:
             #  - fix for digest auth (inspired from urllib2.py @ Python v2.4)
             # Modified by Dirkjan Ochtman:
             #  - import md5 function from a local util module
             # Modified by Augie Fackler:
             #  - add safesend method and use it to prevent broken pipe errors
             #    on large POST requests
             """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
             >>> import urllib2
             >>> from keepalive import HTTPHandler
             >>> keepalive_handler = HTTPHandler()
             >>> opener = urlreq.buildopener(keepalive_handler)
             >>> urlreq.installopener(opener)
             >>>
             >>> fo = urlreq.urlopen('http://www.python.org')
             If a connection to a given host is requested, and all of the existing
             connections are still in use, another connection will be opened.  If
             the handler tries to use an existing connection but it fails in some
             way, it will be closed and removed from the pool.
             To remove the handler, simply re-run build_opener with no arguments, and
             install that opener.
             You can explicitly close connections by using the close_connection()
             method of the returned file-like object (described below) or you can
             use the handler methods:
               close_connection(host)
               close_all()
               open_connections()
             NOTE: using the close_connection and close_all methods of the handler
             should be done with care when using multiple threads.
               * there is nothing that prevents another thread from creating new
                 connections immediately after connections are closed
               * no checks are done to prevent in-use connections from being closed
             >>> keepalive_handler.close_all()
             EXTRA ATTRIBUTES AND METHODS
               Upon a status of 200, the object returned has a few additional
               attributes and methods, which should not be used if you want to
               remain consistent with the normal urllib2-returned objects:
                 close_connection()  -  close the connection to the host
                 readlines()         -  you know, readlines()
                 status              -  the return status (i.e. 404)
                 reason              -  english translation of status (i.e. 'File not found')
               If you want the best of both worlds, use this inside an
               AttributeError-catching try:
               >>> try: status = fo.status
               >>> except AttributeError: status = None
               Unfortunately, these are ONLY there if status == 200, so it's not
               easy to distinguish between non-200 responses.  The reason is that
               urllib2 tries to do clever things with error codes 301, 302, 401,
               and 407, and it wraps the object upon return.
             """
             # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
             from __future__ import absolute_import, print_function
             import errno
             import hashlib
             import socket
             import sys
             import threading
             from .i18n import _
             from . import (
                 node,
                 pycompat,
                 urllibcompat,
                 util,
             )
             from .utils import (
                 procutil,
             )
             httplib = util.httplib
             urlerr = util.urlerr
             urlreq = util.urlreq
             DEBUG = None
             class ConnectionManager(object):
                 """
                 The connection manager must be able to:
                   * keep track of all existing
                   """
                 def __init__(self):
                     self._lock = threading.Lock()
                     self._hostmap = {} # map hosts to a list of connections
                     self._connmap = {} # map connections to host
                     self._readymap = {} # map connection to ready state
                 def add(self, host, connection, ready):
                     self._lock.acquire()
                     try:
                         if host not in self._hostmap:
                             self._hostmap[host] = []
                         self._hostmap[host].append(connection)
                         self._connmap[connection] = host
                         self._readymap[connection] = ready
                     finally:
                         self._lock.release()
                 def remove(self, connection):
                     self._lock.acquire()
                     try:
                         try:
                             host = self._connmap[connection]
                         except KeyError:
                             pass
                         else:
                             del self._connmap[connection]
                             del self._readymap[connection]
                             self._hostmap[host].remove(connection)
                             if not self._hostmap[host]:
                                 del self._hostmap[host]
                     finally:
                         self._lock.release()
                 def set_ready(self, connection, ready):
                     try:
                         self._readymap[connection] = ready
                     except KeyError:
                         pass
                 def get_ready_conn(self, host):
                     conn = None
                     self._lock.acquire()
                     try:
                         if host in self._hostmap:
                             for c in self._hostmap[host]:
                                 if self._readymap[c]:
                                     self._readymap[c] = 0
                                     conn = c
                                     break
                     finally:
                         self._lock.release()
                     return conn
                 def get_all(self, host=None):
                     if host:
                         return list(self._hostmap.get(host, []))
                     else:
                         return dict(self._hostmap)
             class KeepAliveHandler(object):
                 def __init__(self):
                     self._cm = ConnectionManager()
                 #### Connection Management
                 def open_connections(self):
                     """return a list of connected hosts and the number of connections
                     to each.  [('foo.com:80', 2), ('bar.org', 1)]"""
                     return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
                 def close_connection(self, host):
                     """close connection(s) to <host>
                     host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
                     no error occurs if there is no connection to that host."""
                     for h in self._cm.get_all(host):
                         self._cm.remove(h)
                         h.close()
                 def close_all(self):
                     """close all open connections"""
                     for host, conns in self._cm.get_all().iteritems():
                         for h in conns:
                             self._cm.remove(h)
                             h.close()
                 def _request_closed(self, request, host, connection):
                     """tells us that this request is now closed and that the
                     connection is ready for another request"""
                     self._cm.set_ready(connection, 1)
                 def _remove_connection(self, host, connection, close=0):
                     if close:
                         connection.close()
                     self._cm.remove(connection)
                 #### Transaction Execution
                 def http_open(self, req):
                     return self.do_open(HTTPConnection, req)
                 def do_open(self, http_class, req):
                     host = urllibcompat.gethost(req)
                     if not host:
                         raise urlerr.urlerror('no host given')
                     try:
                         h = self._cm.get_ready_conn(host)
                         while h:
                             r = self._reuse_connection(h, req, host)
                             # if this response is non-None, then it worked and we're
                             # done.  Break out, skipping the else block.
                             if r:
                                 break
                             # connection is bad - possibly closed by server
                             # discard it and ask for the next free connection
                             h.close()
                             self._cm.remove(h)
                             h = self._cm.get_ready_conn(host)
                         else:
                             # no (working) free connections were found.  Create a new one.
                             h = http_class(host)
                             if DEBUG:
                                 DEBUG.info("creating new connection to %s (%d)",
                                            host, id(h))
                             self._cm.add(host, h, 0)
                             self._start_transaction(h, req)
                             r = h.getresponse()
                     # The string form of BadStatusLine is the status line. Add some context
                     # to make the error message slightly more useful.
                     except httplib.BadStatusLine as err:
                         raise urlerr.urlerror(
                             _('bad HTTP status line: %s') % pycompat.sysbytes(err.line))
                     except (socket.error, httplib.HTTPException) as err:
                         raise urlerr.urlerror(err)
                     # If not a persistent connection, don't try to reuse it. Look
                     # for this using getattr() since vcr doesn't define this
                     # attribute, and in that case always close the connection.
                     if getattr(r, r'will_close', True):
                         self._cm.remove(h)
                     if DEBUG:
                         DEBUG.info("STATUS: %s, %s", r.status, r.reason)
                     r._handler = self
                     r._host = host
                     r._url = req.get_full_url()
                     r._connection = h
                     r.code = r.status
                     r.headers = r.msg
                     r.msg = r.reason
                     return r
                 def _reuse_connection(self, h, req, host):
                     """start the transaction with a re-used connection
                     return a response object (r) upon success or None on failure.
                     This DOES not close or remove bad connections in cases where
                     it returns.  However, if an unexpected exception occurs, it
                     will close and remove the connection before re-raising.
                     """
                     try:
                         self._start_transaction(h, req)
                         r = h.getresponse()
                         # note: just because we got something back doesn't mean it
                         # worked.  We'll check the version below, too.
                     except (socket.error, httplib.HTTPException):
                         r = None
                     except: # re-raises
                         # adding this block just in case we've missed
                         # something we will still raise the exception, but
                         # lets try and close the connection and remove it
                         # first.  We previously got into a nasty loop
                         # where an exception was uncaught, and so the
                         # connection stayed open.  On the next try, the
                         # same exception was raised, etc.  The trade-off is
                         # that it's now possible this call will raise
                         # a DIFFERENT exception
                         if DEBUG:
                             DEBUG.error("unexpected exception - closing "
                                         "connection to %s (%d)", host, id(h))
                         self._cm.remove(h)
                         h.close()
                         raise
                     if r is None or r.version == 9:
                         # httplib falls back to assuming HTTP 0.9 if it gets a
                         # bad header back.  This is most likely to happen if
                         # the socket has been closed by the server since we
                         # last used the connection.
                         if DEBUG:
                             DEBUG.info("failed to re-use connection to %s (%d)",
                                        host, id(h))
                         r = None
                     else:
                         if DEBUG:
                             DEBUG.info("re-using connection to %s (%d)", host, id(h))
                     return r
                 def _start_transaction(self, h, req):
                     # What follows mostly reimplements HTTPConnection.request()
                     # except it adds self.parent.addheaders in the mix and sends headers
                     # in a deterministic order (to make testing easier).
                     headers = util.sortdict(self.parent.addheaders)
                     headers.update(sorted(req.headers.items()))
                     headers.update(sorted(req.unredirected_hdrs.items()))
                     headers = util.sortdict((n.lower(), v) for n, v in headers.items())
                     skipheaders = {}
                     for n in (r'host', r'accept-encoding'):
                         if n in headers:
                             skipheaders[r'skip_' + n.replace(r'-', r'_')] = 1
                     try:
                         if urllibcompat.hasdata(req):
                             data = urllibcompat.getdata(req)
                             h.putrequest(
                                 req.get_method(), urllibcompat.getselector(req),
                                 **skipheaders)
                             if r'content-type' not in headers:
                                 h.putheader(r'Content-type',
                                             r'application/x-www-form-urlencoded')
                             if r'content-length' not in headers:
                                 h.putheader(r'Content-length', r'%d' % len(data))
                         else:
                             h.putrequest(
                                 req.get_method(), urllibcompat.getselector(req),
                                 **skipheaders)
                     except socket.error as err:
                         raise urlerr.urlerror(err)
                     for k, v in headers.items():
                         h.putheader(k, v)
                     h.endheaders()
                     if urllibcompat.hasdata(req):
                         h.send(data)
             class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
                 pass
             class HTTPResponse(httplib.HTTPResponse):
                 # we need to subclass HTTPResponse in order to
                 # 1) add readline(), readlines(), and readinto() methods
                 # 2) add close_connection() methods
                 # 3) add info() and geturl() methods
                 # in order to add readline(), read must be modified to deal with a
                 # buffer.  example: readline must read a buffer and then spit back
                 # one line at a time.  The only real alternative is to read one
                 # BYTE at a time (ick).  Once something has been read, it can't be
                 # put back (ok, maybe it can, but that's even uglier than this),
                 # so if you THEN do a normal read, you must first take stuff from
                 # the buffer.
                 # the read method wraps the original to accommodate buffering,
                 # although read() never adds to the buffer.
                 # Both readline and readlines have been stolen with almost no
                 # modification from socket.py
                 def __init__(self, sock, debuglevel=0, strict=0, method=None):
                     extrakw = {}
                     if not pycompat.ispy3:
                         extrakw[r'strict'] = True
                         extrakw[r'buffering'] = True
                     httplib.HTTPResponse.__init__(self, sock, debuglevel=debuglevel,
                                                   method=method, **extrakw)
                     self.fileno = sock.fileno
                     self.code = None
                     self._rbuf = ''
                     self._rbufsize = 8096
                     self._handler = None # inserted by the handler later
                     self._host = None    # (same)
                     self._url = None     # (same)
                     self._connection = None # (same)
                 _raw_read = httplib.HTTPResponse.read
                 _raw_readinto = getattr(httplib.HTTPResponse, 'readinto', None)
                 def close(self):
                     if self.fp:
                         self.fp.close()
                         self.fp = None
                         if self._handler:
                             self._handler._request_closed(self, self._host,
                                                           self._connection)
                 def close_connection(self):
                     self._handler._remove_connection(self._host, self._connection, close=1)
                     self.close()
                 def info(self):
                     return self.headers
                 def geturl(self):
                     return self._url
                 def read(self, amt=None):
                     # the _rbuf test is only in this first if for speed.  It's not
                     # logically necessary
                     if self._rbuf and amt is not None:
                         L = len(self._rbuf)
                         if amt > L:
                             amt -= L
                         else:
                             s = self._rbuf[:amt]
                             self._rbuf = self._rbuf[amt:]
                             return s
+                    # Careful! http.client.HTTPResponse.read() on Python 3 is
-                    s = self._rbuf + self._raw_read(amt)
+                    # implemented using readinto(), which can duplicate self._rbuf
+                    # if it's not empty.
+                    s = self._rbuf
                     self._rbuf = ''
+                    s += self._raw_read(amt)
                     return s
                 # stolen from Python SVN #68532 to fix issue1088
                 def _read_chunked(self, amt):
                     chunk_left = self.chunk_left
                     parts = []
                     while True:
                         if chunk_left is None:
                             line = self.fp.readline()
                             i = line.find(';')
                             if i >= 0:
                                 line = line[:i] # strip chunk-extensions
                             try:
                                 chunk_left = int(line, 16)
                             except ValueError:
                                 # close the connection as protocol synchronization is
                                 # probably lost
                                 self.close()
                                 raise httplib.IncompleteRead(''.join(parts))
                             if chunk_left == 0:
                                 break
                         if amt is None:
                             parts.append(self._safe_read(chunk_left))
                         elif amt < chunk_left:
                             parts.append(self._safe_read(amt))
                             self.chunk_left = chunk_left - amt
                             return ''.join(parts)
                         elif amt == chunk_left:
                             parts.append(self._safe_read(amt))
                             self._safe_read(2)  # toss the CRLF at the end of the chunk
                             self.chunk_left = None
                             return ''.join(parts)
                         else:
                             parts.append(self._safe_read(chunk_left))
                             amt -= chunk_left
                         # we read the whole chunk, get another
                         self._safe_read(2)      # toss the CRLF at the end of the chunk
                         chunk_left = None
                     # read and discard trailer up to the CRLF terminator
                     ### note: we shouldn't have any trailers!
                     while True:
                         line = self.fp.readline()
                         if not line:
                             # a vanishingly small number of sites EOF without
                             # sending the trailer
                             break
                         if line == '\r\n':
                             break
                     # we read everything; close the "file"
                     self.close()
                     return ''.join(parts)
                 def readline(self):
                     # Fast path for a line is already available in read buffer.
                     i = self._rbuf.find('\n')
                     if i >= 0:
                         i += 1
                         line = self._rbuf[:i]
                         self._rbuf = self._rbuf[i:]
                         return line
                     # No newline in local buffer. Read until we find one.
                     chunks = [self._rbuf]
                     i = -1
                     readsize = self._rbufsize
                     while True:
                         new = self._raw_read(readsize)
                         if not new:
                             break
                         chunks.append(new)
                         i = new.find('\n')
                         if i >= 0:
                             break
                     # We either have exhausted the stream or have a newline in chunks[-1].
                     # EOF
                     if i == -1:
                         self._rbuf = ''
                         return ''.join(chunks)
                     i += 1
                     self._rbuf = chunks[-1][i:]
                     chunks[-1] = chunks[-1][:i]
                     return ''.join(chunks)
                 def readlines(self, sizehint=0):
                     total = 0
                     list = []
                     while True:
                         line = self.readline()
                         if not line:
                             break
                         list.append(line)
                         total += len(line)
                         if sizehint and total >= sizehint:
                             break
                     return list
                 def readinto(self, dest):
                     if self._raw_readinto is None:
                         res = self.read(len(dest))
                         if not res:
                             return 0
                         dest[0:len(res)] = res
                         return len(res)
                     total = len(dest)
                     have = len(self._rbuf)
                     if have >= total:
                         dest[0:total] = self._rbuf[:total]
                         self._rbuf = self._rbuf[total:]
                         return total
                     mv = memoryview(dest)
                     got = self._raw_readinto(mv[have:total])
                     dest[0:have] = self._rbuf
                     got += len(self._rbuf)
                     self._rbuf = ''
                     return got
             def safesend(self, str):
                 """Send `str' to the server.
                 Shamelessly ripped off from httplib to patch a bad behavior.
                 """
                 # _broken_pipe_resp is an attribute we set in this function
                 # if the socket is closed while we're sending data but
                 # the server sent us a response before hanging up.
                 # In that case, we want to pretend to send the rest of the
                 # outgoing data, and then let the user use getresponse()
                 # (which we wrap) to get this last response before
                 # opening a new socket.
                 if getattr(self, '_broken_pipe_resp', None) is not None:
                     return
                 if self.sock is None:
                     if self.auto_open:
                         self.connect()
                     else:
                         raise httplib.NotConnected
                 # send the data to the server. if we get a broken pipe, then close
                 # the socket. we want to reconnect when somebody tries to send again.
                 #
                 # NOTE: we DO propagate the error, though, because we cannot simply
                 #       ignore the error... the caller will know if they can retry.
                 if self.debuglevel > 0:
                     print("send:", repr(str))
                 try:
                     blocksize = 8192
                     read = getattr(str, 'read', None)
                     if read is not None:
                         if self.debuglevel > 0:
                             print("sending a read()able")
                         data = read(blocksize)
                         while data:
                             self.sock.sendall(data)
                             data = read(blocksize)
                     else:
                         self.sock.sendall(str)
                 except socket.error as v:
                     reraise = True
                     if v[0] == errno.EPIPE:      # Broken pipe
                         if self._HTTPConnection__state == httplib._CS_REQ_SENT:
                             self._broken_pipe_resp = None
                             self._broken_pipe_resp = self.getresponse()
                             reraise = False
                         self.close()
                     if reraise:
                         raise
             def wrapgetresponse(cls):
                 """Wraps getresponse in cls with a broken-pipe sane version.
                 """
                 def safegetresponse(self):
                     # In safesend() we might set the _broken_pipe_resp
                     # attribute, in which case the socket has already
                     # been closed and we just need to give them the response
                     # back. Otherwise, we use the normal response path.
                     r = getattr(self, '_broken_pipe_resp', None)
                     if r is not None:
                         return r
                     return cls.getresponse(self)
                 safegetresponse.__doc__ = cls.getresponse.__doc__
                 return safegetresponse
             class HTTPConnection(httplib.HTTPConnection):
                 # use the modified response class
                 response_class = HTTPResponse
                 send = safesend
                 getresponse = wrapgetresponse(httplib.HTTPConnection)
             #########################################################################
             #####   TEST FUNCTIONS
             #########################################################################
             def continuity(url):
                 md5 = hashlib.md5
                 format = '%25s: %s'
                 # first fetch the file with the normal http handler
                 opener = urlreq.buildopener()
                 urlreq.installopener(opener)
                 fo = urlreq.urlopen(url)
                 foo = fo.read()
                 fo.close()
                 m = md5(foo)
                 print(format % ('normal urllib', node.hex(m.digest())))
                 # now install the keepalive handler and try again
                 opener = urlreq.buildopener(HTTPHandler())
                 urlreq.installopener(opener)
                 fo = urlreq.urlopen(url)
                 foo = fo.read()
                 fo.close()
                 m = md5(foo)
                 print(format % ('keepalive read', node.hex(m.digest())))
                 fo = urlreq.urlopen(url)
                 foo = ''
                 while True:
                     f = fo.readline()
                     if f:
                         foo = foo + f
                     else:
                         break
                 fo.close()
                 m = md5(foo)
                 print(format % ('keepalive readline', node.hex(m.digest())))
             def comp(N, url):
                 print('  making %i connections to:\n  %s' % (N, url))
                 procutil.stdout.write('  first using the normal urllib handlers')
                 # first use normal opener
                 opener = urlreq.buildopener()
                 urlreq.installopener(opener)
                 t1 = fetch(N, url)
                 print('  TIME: %.3f s' % t1)
                 procutil.stdout.write('  now using the keepalive handler       ')
                 # now install the keepalive handler and try again
                 opener = urlreq.buildopener(HTTPHandler())
                 urlreq.installopener(opener)
                 t2 = fetch(N, url)
                 print('  TIME: %.3f s' % t2)
                 print('  improvement factor: %.2f' % (t1 / t2))
             def fetch(N, url, delay=0):
                 import time
                 lens = []
                 starttime = time.time()
                 for i in range(N):
                     if delay and i > 0:
                         time.sleep(delay)
                     fo = urlreq.urlopen(url)
                     foo = fo.read()
                     fo.close()
                     lens.append(len(foo))
                 diff = time.time() - starttime
                 j = 0
                 for i in lens[1:]:
                     j = j + 1
                     if not i == lens[0]:
                         print("WARNING: inconsistent length on read %i: %i" % (j, i))
                 return diff
             def test_timeout(url):
                 global DEBUG
                 dbbackup = DEBUG
                 class FakeLogger(object):
                     def debug(self, msg, *args):
                         print(msg % args)
                     info = warning = error = debug
                 DEBUG = FakeLogger()
                 print("  fetching the file to establish a connection")
                 fo = urlreq.urlopen(url)
                 data1 = fo.read()
                 fo.close()
                 i = 20
                 print("  waiting %i seconds for the server to close the connection" % i)
                 while i > 0:
                     procutil.stdout.write('\r  %2i' % i)
                     procutil.stdout.flush()
                     time.sleep(1)
                     i -= 1
                 procutil.stderr.write('\r')
                 print("  fetching the file a second time")
                 fo = urlreq.urlopen(url)
                 data2 = fo.read()
                 fo.close()
                 if data1 == data2:
                     print('  data are identical')
                 else:
                     print('  ERROR: DATA DIFFER')
                 DEBUG = dbbackup
             def test(url, N=10):
                 print("performing continuity test (making sure stuff isn't corrupted)")
                 continuity(url)
                 print('')
                 print("performing speed comparison")
                 comp(N, url)
                 print('')
                 print("performing dropped-connection check")
                 test_timeout(url)
             if __name__ == '__main__':
                 import time
                 try:
                     N = int(sys.argv[1])
                     url = sys.argv[2]
                 except (IndexError, ValueError):
                     print("%s <integer> <url>" % sys.argv[0])
                 else:
                     test(url, N)