upstream/mercurial-mirror Commit - r29456:e61d384e

1

# This library is free software; you can redistribute it and/or

1

# This library is free software; you can redistribute it and/or

2

# modify it under the terms of the GNU Lesser General Public

2

# modify it under the terms of the GNU Lesser General Public

3

# License as published by the Free Software Foundation; either

3

# License as published by the Free Software Foundation; either

4

# version 2.1 of the License, or (at your option) any later version.

4

# version 2.1 of the License, or (at your option) any later version.

5

#

5

#

6

# This library is distributed in the hope that it will be useful,

6

# This library is distributed in the hope that it will be useful,

7

# but WITHOUT ANY WARRANTY; without even the implied warranty of

7

# but WITHOUT ANY WARRANTY; without even the implied warranty of

8

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

8

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

9

# Lesser General Public License for more details.

9

# Lesser General Public License for more details.

10

#

10

#

11

# You should have received a copy of the GNU Lesser General Public

11

# You should have received a copy of the GNU Lesser General Public

12

# License along with this library; if not, see

12

# License along with this library; if not, see

13

# <http://www.gnu.org/licenses/>.

13

# <http://www.gnu.org/licenses/>.

14

15

# This file is part of urlgrabber, a high-level cross-protocol url-grabber

15

# This file is part of urlgrabber, a high-level cross-protocol url-grabber

16

17

18

# Modified by Benoit Boissinot:

18

# Modified by Benoit Boissinot:

19

# - fix for digest auth (inspired from urllib2.py @ Python v2.4)

19

# - fix for digest auth (inspired from urllib2.py @ Python v2.4)

20

# Modified by Dirkjan Ochtman:

20

# Modified by Dirkjan Ochtman:

21

# - import md5 function from a local util module

21

# - import md5 function from a local util module

22

# Modified by Augie Fackler:

22

# Modified by Augie Fackler:

23

# - add safesend method and use it to prevent broken pipe errors

23

# - add safesend method and use it to prevent broken pipe errors

24

# on large POST requests

24

# on large POST requests

25

26

"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.

26

"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.

27

28

>>> import urllib2

28

>>> import urllib2

29

>>> from keepalive import HTTPHandler

29

>>> from keepalive import HTTPHandler

30

>>> keepalive_handler = HTTPHandler()

30

>>> keepalive_handler = HTTPHandler()

31

>>> opener = urlreq.buildopener(keepalive_handler)

31

>>> opener = urlreq.buildopener(keepalive_handler)

32

>>> urlreq.installopener(opener)

32

>>> urlreq.installopener(opener)

33

>>>

33

>>>

34

>>> fo = urlreq.urlopen('http://www.python.org')

34

>>> fo = urlreq.urlopen('http://www.python.org')

35

36

If a connection to a given host is requested, and all of the existing

36

If a connection to a given host is requested, and all of the existing

37

connections are still in use, another connection will be opened. If

37

connections are still in use, another connection will be opened. If

38

the handler tries to use an existing connection but it fails in some

38

the handler tries to use an existing connection but it fails in some

39

way, it will be closed and removed from the pool.

39

way, it will be closed and removed from the pool.

40

41

To remove the handler, simply re-run build_opener with no arguments, and

41

To remove the handler, simply re-run build_opener with no arguments, and

42

install that opener.

42

install that opener.

43

44

You can explicitly close connections by using the close_connection()

44

You can explicitly close connections by using the close_connection()

45

method of the returned file-like object (described below) or you can

45

method of the returned file-like object (described below) or you can

46

use the handler methods:

46

use the handler methods:

47

48

close_connection(host)

48

close_connection(host)

49

close_all()

49

close_all()

50

open_connections()

50

open_connections()

51

52

NOTE: using the close_connection and close_all methods of the handler

52

NOTE: using the close_connection and close_all methods of the handler

53

should be done with care when using multiple threads.

53

should be done with care when using multiple threads.

54

* there is nothing that prevents another thread from creating new

54

* there is nothing that prevents another thread from creating new

55

connections immediately after connections are closed

55

connections immediately after connections are closed

56

* no checks are done to prevent in-use connections from being closed

56

* no checks are done to prevent in-use connections from being closed

57

58

>>> keepalive_handler.close_all()

58

>>> keepalive_handler.close_all()

59

60

EXTRA ATTRIBUTES AND METHODS

60

EXTRA ATTRIBUTES AND METHODS

61

62

Upon a status of 200, the object returned has a few additional

62

Upon a status of 200, the object returned has a few additional

63

attributes and methods, which should not be used if you want to

63

attributes and methods, which should not be used if you want to

64

remain consistent with the normal urllib2-returned objects:

64

remain consistent with the normal urllib2-returned objects:

65

66

close_connection() - close the connection to the host

66

close_connection() - close the connection to the host

67

readlines() - you know, readlines()

67

readlines() - you know, readlines()

68

status - the return status (i.e. 404)

68

status - the return status (i.e. 404)

69

reason - english translation of status (i.e. 'File not found')

69

reason - english translation of status (i.e. 'File not found')

70

71

If you want the best of both worlds, use this inside an

71

If you want the best of both worlds, use this inside an

72

AttributeError-catching try:

72

AttributeError-catching try:

73

74

>>> try: status = fo.status

74

>>> try: status = fo.status

75

>>> except AttributeError: status = None

75

>>> except AttributeError: status = None

76

77

Unfortunately, these are ONLY there if status == 200, so it's not

77

Unfortunately, these are ONLY there if status == 200, so it's not

78

easy to distinguish between non-200 responses. The reason is that

78

easy to distinguish between non-200 responses. The reason is that

79

urllib2 tries to do clever things with error codes 301, 302, 401,

79

urllib2 tries to do clever things with error codes 301, 302, 401,

80

and 407, and it wraps the object upon return.

80

and 407, and it wraps the object upon return.

81

82

For python versions earlier than 2.4, you can avoid this fancy error

82

For python versions earlier than 2.4, you can avoid this fancy error

83

handling by setting the module-level global HANDLE_ERRORS to zero.

83

handling by setting the module-level global HANDLE_ERRORS to zero.

84

You see, prior to 2.4, it's the HTTP Handler's job to determine what

84

You see, prior to 2.4, it's the HTTP Handler's job to determine what

85

to handle specially, and what to just pass up. HANDLE_ERRORS == 0

85

to handle specially, and what to just pass up. HANDLE_ERRORS == 0

86

means "pass everything up". In python 2.4, however, this job no

86

means "pass everything up". In python 2.4, however, this job no

87

longer belongs to the HTTP Handler and is now done by a NEW handler,

87

longer belongs to the HTTP Handler and is now done by a NEW handler,

88

HTTPErrorProcessor. Here's the bottom line:

88

HTTPErrorProcessor. Here's the bottom line:

89

90

python version < 2.4

90

python version < 2.4

91

HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as

91

HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as

92

errors

92

errors

93

HANDLE_ERRORS == 0 pass everything up, error processing is

93

HANDLE_ERRORS == 0 pass everything up, error processing is

94

left to the calling code

94

left to the calling code

95

python version >= 2.4

95

python version >= 2.4

96

HANDLE_ERRORS == 1 pass up 200, treat the rest as errors

96

HANDLE_ERRORS == 1 pass up 200, treat the rest as errors

97

HANDLE_ERRORS == 0 (default) pass everything up, let the

97

HANDLE_ERRORS == 0 (default) pass everything up, let the

98

other handlers (specifically,

98

other handlers (specifically,

99

HTTPErrorProcessor) decide what to do

99

HTTPErrorProcessor) decide what to do

100

101

In practice, setting the variable either way makes little difference

101

In practice, setting the variable either way makes little difference

102

in python 2.4, so for the most consistent behavior across versions,

102

in python 2.4, so for the most consistent behavior across versions,

103

you probably just want to use the defaults, which will give you

103

you probably just want to use the defaults, which will give you

104

exceptions on errors.

104

exceptions on errors.

105

106

"""

106

"""

107

108

# $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $

108

# $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $

109

110

from __future__ import absolute_import, print_function

110

from __future__ import absolute_import, print_function

111

112

import errno

112

import errno

113

import hashlib

113

import hashlib

114

import socket

114

import socket

115

import sys

115

import sys

116

import thread

116

import threading

117

118

from . import (

118

from . import (

119

util,

119

util,

120

)

120

)

121

122

httplib = util.httplib

122

httplib = util.httplib

123

urlerr = util.urlerr

123

urlerr = util.urlerr

124

urlreq = util.urlreq

124

urlreq = util.urlreq

125

126

DEBUG = None

126

DEBUG = None

127

128

if sys.version_info < (2, 4):

128

if sys.version_info < (2, 4):

129

HANDLE_ERRORS = 1

129

HANDLE_ERRORS = 1

130

else: HANDLE_ERRORS = 0

130

else: HANDLE_ERRORS = 0

131

132

class ConnectionManager(object):

132

class ConnectionManager(object):

133

"""

133

"""

134

The connection manager must be able to:

134

The connection manager must be able to:

135

* keep track of all existing

135

* keep track of all existing

136

"""

136

"""

137

def __init__(self):

137

def __init__(self):

138

self._lock = thread.~~allocate_l~~ock()

138

self._lock = threading.Lock()

139

self._hostmap = {} # map hosts to a list of connections

139

self._hostmap = {} # map hosts to a list of connections

140

self._connmap = {} # map connections to host

140

self._connmap = {} # map connections to host

141

self._readymap = {} # map connection to ready state

141

self._readymap = {} # map connection to ready state

142

143

def add(self, host, connection, ready):

143

def add(self, host, connection, ready):

144

self._lock.acquire()

144

self._lock.acquire()

145

try:

145

try:

146

if host not in self._hostmap:

146

if host not in self._hostmap:

147

self._hostmap[host] = []

147

self._hostmap[host] = []

148

self._hostmap[host].append(connection)

148

self._hostmap[host].append(connection)

149

self._connmap[connection] = host

149

self._connmap[connection] = host

150

self._readymap[connection] = ready

150

self._readymap[connection] = ready

151

finally:

151

finally:

152

self._lock.release()

152

self._lock.release()

153

154

def remove(self, connection):

154

def remove(self, connection):

155

self._lock.acquire()

155

self._lock.acquire()

156

try:

156

try:

157

try:

157

try:

158

host = self._connmap[connection]

158

host = self._connmap[connection]

159

except KeyError:

159

except KeyError:

160

pass

160

pass

161

else:

161

else:

162

del self._connmap[connection]

162

del self._connmap[connection]

163

del self._readymap[connection]

163

del self._readymap[connection]

164

self._hostmap[host].remove(connection)

164

self._hostmap[host].remove(connection)

165

if not self._hostmap[host]: del self._hostmap[host]

165

if not self._hostmap[host]: del self._hostmap[host]

166

finally:

166

finally:

167

self._lock.release()

167

self._lock.release()

168

169

def set_ready(self, connection, ready):

169

def set_ready(self, connection, ready):

170

try:

170

try:

171

self._readymap[connection] = ready

171

self._readymap[connection] = ready

172

except KeyError:

172

except KeyError:

173

pass

173

pass

174

175

def get_ready_conn(self, host):

175

def get_ready_conn(self, host):

176

conn = None

176

conn = None

177

self._lock.acquire()

177

self._lock.acquire()

178

try:

178

try:

179

if host in self._hostmap:

179

if host in self._hostmap:

180

for c in self._hostmap[host]:

180

for c in self._hostmap[host]:

181

if self._readymap[c]:

181

if self._readymap[c]:

182

self._readymap[c] = 0

182

self._readymap[c] = 0

183

conn = c

183

conn = c

184

break

184

break

185

finally:

185

finally:

186

self._lock.release()

186

self._lock.release()

187

return conn

187

return conn

188

189

def get_all(self, host=None):

189

def get_all(self, host=None):

190

if host:

190

if host:

191

return list(self._hostmap.get(host, []))

191

return list(self._hostmap.get(host, []))

192

else:

192

else:

193

return dict(self._hostmap)

193

return dict(self._hostmap)

194

195

class KeepAliveHandler(object):

195

class KeepAliveHandler(object):

196

def __init__(self):

196

def __init__(self):

197

self._cm = ConnectionManager()

197

self._cm = ConnectionManager()

198

199

#### Connection Management

199

#### Connection Management

200

def open_connections(self):

200

def open_connections(self):

201

"""return a list of connected hosts and the number of connections

201

"""return a list of connected hosts and the number of connections

202

to each. [('foo.com:80', 2), ('bar.org', 1)]"""

202

to each. [('foo.com:80', 2), ('bar.org', 1)]"""

203

return [(host, len(li)) for (host, li) in self._cm.get_all().items()]

203

return [(host, len(li)) for (host, li) in self._cm.get_all().items()]

204

205

def close_connection(self, host):

205

def close_connection(self, host):

206

"""close connection(s) to <host>

206

"""close connection(s) to <host>

207

host is the host:port spec, as in 'www.cnn.com:8080' as passed in.

207

host is the host:port spec, as in 'www.cnn.com:8080' as passed in.

208

no error occurs if there is no connection to that host."""

208

no error occurs if there is no connection to that host."""

209

for h in self._cm.get_all(host):

209

for h in self._cm.get_all(host):

210

self._cm.remove(h)

210

self._cm.remove(h)

211

h.close()

211

h.close()

212

213

def close_all(self):

213

def close_all(self):

214

"""close all open connections"""

214

"""close all open connections"""

215

for host, conns in self._cm.get_all().iteritems():

215

for host, conns in self._cm.get_all().iteritems():

216

for h in conns:

216

for h in conns:

217

self._cm.remove(h)

217

self._cm.remove(h)

218

h.close()

218

h.close()

219

220

def _request_closed(self, request, host, connection):

220

def _request_closed(self, request, host, connection):

221

"""tells us that this request is now closed and that the

221

"""tells us that this request is now closed and that the

222

connection is ready for another request"""

222

connection is ready for another request"""

223

self._cm.set_ready(connection, 1)

223

self._cm.set_ready(connection, 1)

224

225

def _remove_connection(self, host, connection, close=0):

225

def _remove_connection(self, host, connection, close=0):

226

if close:

226

if close:

227

connection.close()

227

connection.close()

228

self._cm.remove(connection)

228

self._cm.remove(connection)

229

230

#### Transaction Execution

230

#### Transaction Execution

231

def http_open(self, req):

231

def http_open(self, req):

232

return self.do_open(HTTPConnection, req)

232

return self.do_open(HTTPConnection, req)

233

234

def do_open(self, http_class, req):

234

def do_open(self, http_class, req):

235

host = req.get_host()

235

host = req.get_host()

236

if not host:

236

if not host:

237

raise urlerr.urlerror('no host given')

237

raise urlerr.urlerror('no host given')

238

239

try:

239

try:

240

h = self._cm.get_ready_conn(host)

240

h = self._cm.get_ready_conn(host)

241

while h:

241

while h:

242

r = self._reuse_connection(h, req, host)

242

r = self._reuse_connection(h, req, host)

243

244

# if this response is non-None, then it worked and we're

244

# if this response is non-None, then it worked and we're

245

# done. Break out, skipping the else block.

245

# done. Break out, skipping the else block.

246

if r:

246

if r:

247

break

247

break

248

249

# connection is bad - possibly closed by server

249

# connection is bad - possibly closed by server

250

# discard it and ask for the next free connection

250

# discard it and ask for the next free connection

251

h.close()

251

h.close()

252

self._cm.remove(h)

252

self._cm.remove(h)

253

h = self._cm.get_ready_conn(host)

253

h = self._cm.get_ready_conn(host)

254

else:

254

else:

255

# no (working) free connections were found. Create a new one.

255

# no (working) free connections were found. Create a new one.

256

h = http_class(host)

256

h = http_class(host)

257

if DEBUG:

257

if DEBUG:

258

DEBUG.info("creating new connection to %s (%d)",

258

DEBUG.info("creating new connection to %s (%d)",

259

host, id(h))

259

host, id(h))

260

self._cm.add(host, h, 0)

260

self._cm.add(host, h, 0)

261

self._start_transaction(h, req)

261

self._start_transaction(h, req)

262

r = h.getresponse()

262

r = h.getresponse()

263

except (socket.error, httplib.HTTPException) as err:

263

except (socket.error, httplib.HTTPException) as err:

264

raise urlerr.urlerror(err)

264

raise urlerr.urlerror(err)

265

266

# if not a persistent connection, don't try to reuse it

266

# if not a persistent connection, don't try to reuse it

267

if r.will_close:

267

if r.will_close:

268

self._cm.remove(h)

268

self._cm.remove(h)

269

270

if DEBUG:

270

if DEBUG:

271

DEBUG.info("STATUS: %s, %s", r.status, r.reason)

271

DEBUG.info("STATUS: %s, %s", r.status, r.reason)

272

r._handler = self

272

r._handler = self

273

r._host = host

273

r._host = host

274

r._url = req.get_full_url()

274

r._url = req.get_full_url()

275

r._connection = h

275

r._connection = h

276

r.code = r.status

276

r.code = r.status

277

r.headers = r.msg

277

r.headers = r.msg

278

r.msg = r.reason

278

r.msg = r.reason

279

280

if r.status == 200 or not HANDLE_ERRORS:

280

if r.status == 200 or not HANDLE_ERRORS:

281

return r

281

return r

282

else:

282

else:

283

return self.parent.error('http', req, r,

283

return self.parent.error('http', req, r,

284

r.status, r.msg, r.headers)

284

r.status, r.msg, r.headers)

285

286

def _reuse_connection(self, h, req, host):

286

def _reuse_connection(self, h, req, host):

287

"""start the transaction with a re-used connection

287

"""start the transaction with a re-used connection

288

return a response object (r) upon success or None on failure.

288

return a response object (r) upon success or None on failure.

289

This DOES not close or remove bad connections in cases where

289

This DOES not close or remove bad connections in cases where

290

it returns. However, if an unexpected exception occurs, it

290

it returns. However, if an unexpected exception occurs, it

291

will close and remove the connection before re-raising.

291

will close and remove the connection before re-raising.

292

"""

292

"""

293

try:

293

try:

294

self._start_transaction(h, req)

294

self._start_transaction(h, req)

295

r = h.getresponse()

295

r = h.getresponse()

296

# note: just because we got something back doesn't mean it

296

# note: just because we got something back doesn't mean it

297

# worked. We'll check the version below, too.

297

# worked. We'll check the version below, too.

298

except (socket.error, httplib.HTTPException):

298

except (socket.error, httplib.HTTPException):

299

r = None

299

r = None

300

except: # re-raises

300

except: # re-raises

301

# adding this block just in case we've missed

301

# adding this block just in case we've missed

302

# something we will still raise the exception, but

302

# something we will still raise the exception, but

303

# lets try and close the connection and remove it

303

# lets try and close the connection and remove it

304

# first. We previously got into a nasty loop

304

# first. We previously got into a nasty loop

305

# where an exception was uncaught, and so the

305

# where an exception was uncaught, and so the

306

# connection stayed open. On the next try, the

306

# connection stayed open. On the next try, the

307

# same exception was raised, etc. The trade-off is

307

# same exception was raised, etc. The trade-off is

308

# that it's now possible this call will raise

308

# that it's now possible this call will raise

309

# a DIFFERENT exception

309

# a DIFFERENT exception

310

if DEBUG:

310

if DEBUG:

311

DEBUG.error("unexpected exception - closing "

311

DEBUG.error("unexpected exception - closing "

312

"connection to %s (%d)", host, id(h))

312

"connection to %s (%d)", host, id(h))

313

self._cm.remove(h)

313

self._cm.remove(h)

314

h.close()

314

h.close()

315

raise

315

raise

316

317

if r is None or r.version == 9:

317

if r is None or r.version == 9:

318

# httplib falls back to assuming HTTP 0.9 if it gets a

318

# httplib falls back to assuming HTTP 0.9 if it gets a

319

# bad header back. This is most likely to happen if

319

# bad header back. This is most likely to happen if

320

# the socket has been closed by the server since we

320

# the socket has been closed by the server since we

321

# last used the connection.

321

# last used the connection.

322

if DEBUG:

322

if DEBUG:

323

DEBUG.info("failed to re-use connection to %s (%d)",

323

DEBUG.info("failed to re-use connection to %s (%d)",

324

host, id(h))

324

host, id(h))

325

r = None

325

r = None

326

else:

326

else:

327

if DEBUG:

327

if DEBUG:

328

DEBUG.info("re-using connection to %s (%d)", host, id(h))

328

DEBUG.info("re-using connection to %s (%d)", host, id(h))

329

330

return r

330

return r

331

332

def _start_transaction(self, h, req):

332

def _start_transaction(self, h, req):

333

# What follows mostly reimplements HTTPConnection.request()

333

# What follows mostly reimplements HTTPConnection.request()

334

# except it adds self.parent.addheaders in the mix.

334

# except it adds self.parent.addheaders in the mix.

335

headers = req.headers.copy()

335

headers = req.headers.copy()

336

if sys.version_info >= (2, 4):

336

if sys.version_info >= (2, 4):

337

headers.update(req.unredirected_hdrs)

337

headers.update(req.unredirected_hdrs)

338

headers.update(self.parent.addheaders)

338

headers.update(self.parent.addheaders)

339

headers = dict((n.lower(), v) for n, v in headers.items())

339

headers = dict((n.lower(), v) for n, v in headers.items())

340

skipheaders = {}

340

skipheaders = {}

341

for n in ('host', 'accept-encoding'):

341

for n in ('host', 'accept-encoding'):

342

if n in headers:

342

if n in headers:

343

skipheaders['skip_' + n.replace('-', '_')] = 1

343

skipheaders['skip_' + n.replace('-', '_')] = 1

344

try:

344

try:

345

if req.has_data():

345

if req.has_data():

346

data = req.get_data()

346

data = req.get_data()

347

h.putrequest('POST', req.get_selector(), **skipheaders)

347

h.putrequest('POST', req.get_selector(), **skipheaders)

348

if 'content-type' not in headers:

348

if 'content-type' not in headers:

349

h.putheader('Content-type',

349

h.putheader('Content-type',

350

'application/x-www-form-urlencoded')

350

'application/x-www-form-urlencoded')

351

if 'content-length' not in headers:

351

if 'content-length' not in headers:

352

h.putheader('Content-length', '%d' % len(data))

352

h.putheader('Content-length', '%d' % len(data))

353

else:

353

else:

354

h.putrequest('GET', req.get_selector(), **skipheaders)

354

h.putrequest('GET', req.get_selector(), **skipheaders)

355

except socket.error as err:

355

except socket.error as err:

356

raise urlerr.urlerror(err)

356

raise urlerr.urlerror(err)

357

for k, v in headers.items():

357

for k, v in headers.items():

358

h.putheader(k, v)

358

h.putheader(k, v)

359

h.endheaders()

359

h.endheaders()

360

if req.has_data():

360

if req.has_data():

361

h.send(data)

361

h.send(data)

362

363

class HTTPHandler(KeepAliveHandler, urlreq.httphandler):

363

class HTTPHandler(KeepAliveHandler, urlreq.httphandler):

364

pass

364

pass

365

366

class HTTPResponse(httplib.HTTPResponse):

366

class HTTPResponse(httplib.HTTPResponse):

367

# we need to subclass HTTPResponse in order to

367

# we need to subclass HTTPResponse in order to

368

# 1) add readline() and readlines() methods

368

# 1) add readline() and readlines() methods

369

# 2) add close_connection() methods

369

# 2) add close_connection() methods

370

# 3) add info() and geturl() methods

370

# 3) add info() and geturl() methods

371

372

# in order to add readline(), read must be modified to deal with a

372

# in order to add readline(), read must be modified to deal with a

373

# buffer. example: readline must read a buffer and then spit back

373

# buffer. example: readline must read a buffer and then spit back

374

# one line at a time. The only real alternative is to read one

374

# one line at a time. The only real alternative is to read one

375

# BYTE at a time (ick). Once something has been read, it can't be

375

# BYTE at a time (ick). Once something has been read, it can't be

376

# put back (ok, maybe it can, but that's even uglier than this),

376

# put back (ok, maybe it can, but that's even uglier than this),

377

# so if you THEN do a normal read, you must first take stuff from

377

# so if you THEN do a normal read, you must first take stuff from

378

# the buffer.

378

# the buffer.

379

380

# the read method wraps the original to accommodate buffering,

380

# the read method wraps the original to accommodate buffering,

381

# although read() never adds to the buffer.

381

# although read() never adds to the buffer.

382

# Both readline and readlines have been stolen with almost no

382

# Both readline and readlines have been stolen with almost no

383

# modification from socket.py

383

# modification from socket.py

384

385

386

def __init__(self, sock, debuglevel=0, strict=0, method=None):

386

def __init__(self, sock, debuglevel=0, strict=0, method=None):

387

httplib.HTTPResponse.__init__(self, sock, debuglevel, method)

387

httplib.HTTPResponse.__init__(self, sock, debuglevel, method)

388

self.fileno = sock.fileno

388

self.fileno = sock.fileno

389

self.code = None

389

self.code = None

390

self._rbuf = ''

390

self._rbuf = ''

391

self._rbufsize = 8096

391

self._rbufsize = 8096

392

self._handler = None # inserted by the handler later

392

self._handler = None # inserted by the handler later

393

self._host = None # (same)

393

self._host = None # (same)

394

self._url = None # (same)

394

self._url = None # (same)

395

self._connection = None # (same)

395

self._connection = None # (same)

396

397

_raw_read = httplib.HTTPResponse.read

397

_raw_read = httplib.HTTPResponse.read

398

399

def close(self):

399

def close(self):

400

if self.fp:

400

if self.fp:

401

self.fp.close()

401

self.fp.close()

402

self.fp = None

402

self.fp = None

403

if self._handler:

403

if self._handler:

404

self._handler._request_closed(self, self._host,

404

self._handler._request_closed(self, self._host,

405

self._connection)

405

self._connection)

406

407

def close_connection(self):

407

def close_connection(self):

408

self._handler._remove_connection(self._host, self._connection, close=1)

408

self._handler._remove_connection(self._host, self._connection, close=1)

409

self.close()

409

self.close()

410

411

def info(self):

411

def info(self):

412

return self.headers

412

return self.headers

413

414

def geturl(self):

414

def geturl(self):

415

return self._url

415

return self._url

416

417

def read(self, amt=None):

417

def read(self, amt=None):

418

# the _rbuf test is only in this first if for speed. It's not

418

# the _rbuf test is only in this first if for speed. It's not

419

# logically necessary

419

# logically necessary

420

if self._rbuf and not amt is None:

420

if self._rbuf and not amt is None:

421

L = len(self._rbuf)

421

L = len(self._rbuf)

422

if amt > L:

422

if amt > L:

423

amt -= L

423

amt -= L

424

else:

424

else:

425

s = self._rbuf[:amt]

425

s = self._rbuf[:amt]

426

self._rbuf = self._rbuf[amt:]

426

self._rbuf = self._rbuf[amt:]

427

return s

427

return s

428

429

s = self._rbuf + self._raw_read(amt)

429

s = self._rbuf + self._raw_read(amt)

430

self._rbuf = ''

430

self._rbuf = ''

431

return s

431

return s

432

433

# stolen from Python SVN #68532 to fix issue1088

433

# stolen from Python SVN #68532 to fix issue1088

434

def _read_chunked(self, amt):

434

def _read_chunked(self, amt):

435

chunk_left = self.chunk_left

435

chunk_left = self.chunk_left

436

value = ''

436

value = ''

437

438

# XXX This accumulates chunks by repeated string concatenation,

438

# XXX This accumulates chunks by repeated string concatenation,

439

# which is not efficient as the number or size of chunks gets big.

439

# which is not efficient as the number or size of chunks gets big.

440

while True:

440

while True:

441

if chunk_left is None:

441

if chunk_left is None:

442

line = self.fp.readline()

442

line = self.fp.readline()

443

i = line.find(';')

443

i = line.find(';')

444

if i >= 0:

444

if i >= 0:

445

line = line[:i] # strip chunk-extensions

445

line = line[:i] # strip chunk-extensions

446

try:

446

try:

447

chunk_left = int(line, 16)

447

chunk_left = int(line, 16)

448

except ValueError:

448

except ValueError:

449

# close the connection as protocol synchronization is

449

# close the connection as protocol synchronization is

450

# probably lost

450

# probably lost

451

self.close()

451

self.close()

452

raise httplib.IncompleteRead(value)

452

raise httplib.IncompleteRead(value)

453

if chunk_left == 0:

453

if chunk_left == 0:

454

break

454

break

455

if amt is None:

455

if amt is None:

456

value += self._safe_read(chunk_left)

456

value += self._safe_read(chunk_left)

457

elif amt < chunk_left:

457

elif amt < chunk_left:

458

value += self._safe_read(amt)

458

value += self._safe_read(amt)

459

self.chunk_left = chunk_left - amt

459

self.chunk_left = chunk_left - amt

460

return value

460

return value

461

elif amt == chunk_left:

461

elif amt == chunk_left:

462

value += self._safe_read(amt)

462

value += self._safe_read(amt)

463

self._safe_read(2) # toss the CRLF at the end of the chunk

463

self._safe_read(2) # toss the CRLF at the end of the chunk

464

self.chunk_left = None

464

self.chunk_left = None

465

return value

465

return value

466

else:

466

else:

467

value += self._safe_read(chunk_left)

467

value += self._safe_read(chunk_left)

468

amt -= chunk_left

468

amt -= chunk_left

469

470

# we read the whole chunk, get another

470

# we read the whole chunk, get another

471

self._safe_read(2) # toss the CRLF at the end of the chunk

471

self._safe_read(2) # toss the CRLF at the end of the chunk

472

chunk_left = None

472

chunk_left = None

473

474

# read and discard trailer up to the CRLF terminator

474

# read and discard trailer up to the CRLF terminator

475

### note: we shouldn't have any trailers!

475

### note: we shouldn't have any trailers!

476

while True:

476

while True:

477

line = self.fp.readline()

477

line = self.fp.readline()

478

if not line:

478

if not line:

479

# a vanishingly small number of sites EOF without

479

# a vanishingly small number of sites EOF without

480

# sending the trailer

480

# sending the trailer

481

break

481

break

482

if line == '\r\n':

482

if line == '\r\n':

483

break

483

break

484

485

# we read everything; close the "file"

485

# we read everything; close the "file"

486

self.close()

486

self.close()

487

488

return value

488

return value

489

490

def readline(self, limit=-1):

490

def readline(self, limit=-1):

491

i = self._rbuf.find('\n')

491

i = self._rbuf.find('\n')

492

while i < 0 and not (0 < limit <= len(self._rbuf)):

492

while i < 0 and not (0 < limit <= len(self._rbuf)):

493

new = self._raw_read(self._rbufsize)

493

new = self._raw_read(self._rbufsize)

494

if not new:

494

if not new:

495

break

495

break

496

i = new.find('\n')

496

i = new.find('\n')

497

if i >= 0:

497

if i >= 0:

498

i = i + len(self._rbuf)

498

i = i + len(self._rbuf)

499

self._rbuf = self._rbuf + new

499

self._rbuf = self._rbuf + new

500

if i < 0:

500

if i < 0:

501

i = len(self._rbuf)

501

i = len(self._rbuf)

502

else:

502

else:

503

i = i + 1

503

i = i + 1

504

if 0 <= limit < len(self._rbuf):

504

if 0 <= limit < len(self._rbuf):

505

i = limit

505

i = limit

506

data, self._rbuf = self._rbuf[:i], self._rbuf[i:]

506

data, self._rbuf = self._rbuf[:i], self._rbuf[i:]

507

return data

507

return data

508

509

def readlines(self, sizehint=0):

509

def readlines(self, sizehint=0):

510

total = 0

510

total = 0

511

list = []

511

list = []

512

while True:

512

while True:

513

line = self.readline()

513

line = self.readline()

514

if not line:

514

if not line:

515

break

515

break

516

list.append(line)

516

list.append(line)

517

total += len(line)

517

total += len(line)

518

if sizehint and total >= sizehint:

518

if sizehint and total >= sizehint:

519

break

519

break

520

return list

520

return list

521

522

def safesend(self, str):

522

def safesend(self, str):

523

"""Send `str' to the server.

523

"""Send `str' to the server.

524

525

Shamelessly ripped off from httplib to patch a bad behavior.

525

Shamelessly ripped off from httplib to patch a bad behavior.

526

"""

526

"""

527

# _broken_pipe_resp is an attribute we set in this function

527

# _broken_pipe_resp is an attribute we set in this function

528

# if the socket is closed while we're sending data but

528

# if the socket is closed while we're sending data but

529

# the server sent us a response before hanging up.

529

# the server sent us a response before hanging up.

530

# In that case, we want to pretend to send the rest of the

530

# In that case, we want to pretend to send the rest of the

531

# outgoing data, and then let the user use getresponse()

531

# outgoing data, and then let the user use getresponse()

532

# (which we wrap) to get this last response before

532

# (which we wrap) to get this last response before

533

# opening a new socket.

533

# opening a new socket.

534

if getattr(self, '_broken_pipe_resp', None) is not None:

534

if getattr(self, '_broken_pipe_resp', None) is not None:

535

return

535

return

536

537

if self.sock is None:

537

if self.sock is None:

538

if self.auto_open:

538

if self.auto_open:

539

self.connect()

539

self.connect()

540

else:

540

else:

541

raise httplib.NotConnected

541

raise httplib.NotConnected

542

543

# send the data to the server. if we get a broken pipe, then close

543

# send the data to the server. if we get a broken pipe, then close

544

# the socket. we want to reconnect when somebody tries to send again.

544

# the socket. we want to reconnect when somebody tries to send again.

545

#

545

#

546

# NOTE: we DO propagate the error, though, because we cannot simply

546

# NOTE: we DO propagate the error, though, because we cannot simply

547

# ignore the error... the caller will know if they can retry.

547

# ignore the error... the caller will know if they can retry.

548

if self.debuglevel > 0:

548

if self.debuglevel > 0:

549

print("send:", repr(str))

549

print("send:", repr(str))

550

try:

550

try:

551

blocksize = 8192

551

blocksize = 8192

552

read = getattr(str, 'read', None)

552

read = getattr(str, 'read', None)

553

if read is not None:

553

if read is not None:

554

if self.debuglevel > 0:

554

if self.debuglevel > 0:

555

print("sending a read()able")

555

print("sending a read()able")

556

data = read(blocksize)

556

data = read(blocksize)

557

while data:

557

while data:

558

self.sock.sendall(data)

558

self.sock.sendall(data)

559

data = read(blocksize)

559

data = read(blocksize)

560

else:

560

else:

561

self.sock.sendall(str)

561

self.sock.sendall(str)

562

except socket.error as v:

562

except socket.error as v:

563

reraise = True

563

reraise = True

564

if v[0] == errno.EPIPE: # Broken pipe

564

if v[0] == errno.EPIPE: # Broken pipe

565

if self._HTTPConnection__state == httplib._CS_REQ_SENT:

565

if self._HTTPConnection__state == httplib._CS_REQ_SENT:

566

self._broken_pipe_resp = None

566

self._broken_pipe_resp = None

567

self._broken_pipe_resp = self.getresponse()

567

self._broken_pipe_resp = self.getresponse()

568

reraise = False

568

reraise = False

569

self.close()

569

self.close()

570

if reraise:

570

if reraise:

571

raise

571

raise

572

573

def wrapgetresponse(cls):

573

def wrapgetresponse(cls):

574

"""Wraps getresponse in cls with a broken-pipe sane version.

574

"""Wraps getresponse in cls with a broken-pipe sane version.

575

"""

575

"""

576

def safegetresponse(self):

576

def safegetresponse(self):

577

# In safesend() we might set the _broken_pipe_resp

577

# In safesend() we might set the _broken_pipe_resp

578

# attribute, in which case the socket has already

578

# attribute, in which case the socket has already

579

# been closed and we just need to give them the response

579

# been closed and we just need to give them the response

580

# back. Otherwise, we use the normal response path.

580

# back. Otherwise, we use the normal response path.

581

r = getattr(self, '_broken_pipe_resp', None)

581

r = getattr(self, '_broken_pipe_resp', None)

582

if r is not None:

582

if r is not None:

583

return r

583

return r

584

return cls.getresponse(self)

584

return cls.getresponse(self)

585

safegetresponse.__doc__ = cls.getresponse.__doc__

585

safegetresponse.__doc__ = cls.getresponse.__doc__

586

return safegetresponse

586

return safegetresponse

587

588

class HTTPConnection(httplib.HTTPConnection):

588

class HTTPConnection(httplib.HTTPConnection):

589

# use the modified response class

589

# use the modified response class

590

response_class = HTTPResponse

590

response_class = HTTPResponse

591

send = safesend

591

send = safesend

592

getresponse = wrapgetresponse(httplib.HTTPConnection)

592

getresponse = wrapgetresponse(httplib.HTTPConnection)

593

594

595

#########################################################################

595

#########################################################################

596

##### TEST FUNCTIONS

596

##### TEST FUNCTIONS

597

#########################################################################

597

#########################################################################

598

599

def error_handler(url):

599

def error_handler(url):

600

global HANDLE_ERRORS

600

global HANDLE_ERRORS

601

orig = HANDLE_ERRORS

601

orig = HANDLE_ERRORS

602

keepalive_handler = HTTPHandler()

602

keepalive_handler = HTTPHandler()

603

opener = urlreq.buildopener(keepalive_handler)

603

opener = urlreq.buildopener(keepalive_handler)

604

urlreq.installopener(opener)

604

urlreq.installopener(opener)

605

pos = {0: 'off', 1: 'on'}

605

pos = {0: 'off', 1: 'on'}

606

for i in (0, 1):

606

for i in (0, 1):

607

print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))

607

print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))

608

HANDLE_ERRORS = i

608

HANDLE_ERRORS = i

609

try:

609

try:

610

fo = urlreq.urlopen(url)

610

fo = urlreq.urlopen(url)

611

fo.read()

611

fo.read()

612

fo.close()

612

fo.close()

613

try:

613

try:

614

status, reason = fo.status, fo.reason

614

status, reason = fo.status, fo.reason

615

except AttributeError:

615

except AttributeError:

616

status, reason = None, None

616

status, reason = None, None

617

except IOError as e:

617

except IOError as e:

618

print(" EXCEPTION: %s" % e)

618

print(" EXCEPTION: %s" % e)

619

raise

619

raise

620

else:

620

else:

621

print(" status = %s, reason = %s" % (status, reason))

621

print(" status = %s, reason = %s" % (status, reason))

622

HANDLE_ERRORS = orig

622

HANDLE_ERRORS = orig

623

hosts = keepalive_handler.open_connections()

623

hosts = keepalive_handler.open_connections()

624

print("open connections:", hosts)

624

print("open connections:", hosts)

625

keepalive_handler.close_all()

625

keepalive_handler.close_all()

626

627

def continuity(url):

627

def continuity(url):

628

md5 = hashlib.md5

628

md5 = hashlib.md5

629

format = '%25s: %s'

629

format = '%25s: %s'

630

631

# first fetch the file with the normal http handler

631

# first fetch the file with the normal http handler

632

opener = urlreq.buildopener()

632

opener = urlreq.buildopener()

633

urlreq.installopener(opener)

633

urlreq.installopener(opener)

634

fo = urlreq.urlopen(url)

634

fo = urlreq.urlopen(url)

635

foo = fo.read()

635

foo = fo.read()

636

fo.close()

636

fo.close()

637

m = md5(foo)

637

m = md5(foo)

638

print(format % ('normal urllib', m.hexdigest()))

638

print(format % ('normal urllib', m.hexdigest()))

639

640

# now install the keepalive handler and try again

640

# now install the keepalive handler and try again

641

opener = urlreq.buildopener(HTTPHandler())

641

opener = urlreq.buildopener(HTTPHandler())

642

urlreq.installopener(opener)

642

urlreq.installopener(opener)

643

644

fo = urlreq.urlopen(url)

644

fo = urlreq.urlopen(url)

645

foo = fo.read()

645

foo = fo.read()

646

fo.close()

646

fo.close()

647

m = md5(foo)

647

m = md5(foo)

648

print(format % ('keepalive read', m.hexdigest()))

648

print(format % ('keepalive read', m.hexdigest()))

649

650

fo = urlreq.urlopen(url)

650

fo = urlreq.urlopen(url)

651

foo = ''

651

foo = ''

652

while True:

652

while True:

653

f = fo.readline()

653

f = fo.readline()

654

if f:

654

if f:

655

foo = foo + f

655

foo = foo + f

656

else: break

656

else: break

657

fo.close()

657

fo.close()

658

m = md5(foo)

658

m = md5(foo)

659

print(format % ('keepalive readline', m.hexdigest()))

659

print(format % ('keepalive readline', m.hexdigest()))

660

661

def comp(N, url):

661

def comp(N, url):

662

print(' making %i connections to:\n %s' % (N, url))

662

print(' making %i connections to:\n %s' % (N, url))

663

664

sys.stdout.write(' first using the normal urllib handlers')

664

sys.stdout.write(' first using the normal urllib handlers')

665

# first use normal opener

665

# first use normal opener

666

opener = urlreq.buildopener()

666

opener = urlreq.buildopener()

667

urlreq.installopener(opener)

667

urlreq.installopener(opener)

668

t1 = fetch(N, url)

668

t1 = fetch(N, url)

669

print(' TIME: %.3f s' % t1)

669

print(' TIME: %.3f s' % t1)

670

671

sys.stdout.write(' now using the keepalive handler ')

671

sys.stdout.write(' now using the keepalive handler ')

672

# now install the keepalive handler and try again

672

# now install the keepalive handler and try again

673

opener = urlreq.buildopener(HTTPHandler())

673

opener = urlreq.buildopener(HTTPHandler())

674

urlreq.installopener(opener)

674

urlreq.installopener(opener)

675

t2 = fetch(N, url)

675

t2 = fetch(N, url)

676

print(' TIME: %.3f s' % t2)

676

print(' TIME: %.3f s' % t2)

677

print(' improvement factor: %.2f' % (t1 / t2))

677

print(' improvement factor: %.2f' % (t1 / t2))

678

679

def fetch(N, url, delay=0):

679

def fetch(N, url, delay=0):

680

import time

680

import time

681

lens = []

681

lens = []

682

starttime = time.time()

682

starttime = time.time()

683

for i in range(N):

683

for i in range(N):

684

if delay and i > 0:

684

if delay and i > 0:

685

time.sleep(delay)

685

time.sleep(delay)

686

fo = urlreq.urlopen(url)

686

fo = urlreq.urlopen(url)

687

foo = fo.read()

687

foo = fo.read()

688

fo.close()

688

fo.close()

689

lens.append(len(foo))

689

lens.append(len(foo))

690

diff = time.time() - starttime

690

diff = time.time() - starttime

691

692

j = 0

692

j = 0

693

for i in lens[1:]:

693

for i in lens[1:]:

694

j = j + 1

694

j = j + 1

695

if not i == lens[0]:

695

if not i == lens[0]:

696

print("WARNING: inconsistent length on read %i: %i" % (j, i))

696

print("WARNING: inconsistent length on read %i: %i" % (j, i))

697

698

return diff

698

return diff

699

700

def test_timeout(url):

700

def test_timeout(url):

701

global DEBUG

701

global DEBUG

702

dbbackup = DEBUG

702

dbbackup = DEBUG

703

class FakeLogger(object):

703

class FakeLogger(object):

704

def debug(self, msg, *args):

704

def debug(self, msg, *args):

705

print(msg % args)

705

print(msg % args)

706

info = warning = error = debug

706

info = warning = error = debug

707

DEBUG = FakeLogger()

707

DEBUG = FakeLogger()

708

print(" fetching the file to establish a connection")

708

print(" fetching the file to establish a connection")

709

fo = urlreq.urlopen(url)

709

fo = urlreq.urlopen(url)

710

data1 = fo.read()

710

data1 = fo.read()

711

fo.close()

711

fo.close()

712

713

i = 20

713

i = 20

714

print(" waiting %i seconds for the server to close the connection" % i)

714

print(" waiting %i seconds for the server to close the connection" % i)

715

while i > 0:

715

while i > 0:

716

sys.stdout.write('\r %2i' % i)

716

sys.stdout.write('\r %2i' % i)

717

sys.stdout.flush()

717

sys.stdout.flush()

718

time.sleep(1)

718

time.sleep(1)

719

i -= 1

719

i -= 1

720

sys.stderr.write('\r')

720

sys.stderr.write('\r')

721

722

print(" fetching the file a second time")

722

print(" fetching the file a second time")

723

fo = urlreq.urlopen(url)

723

fo = urlreq.urlopen(url)

724

data2 = fo.read()

724

data2 = fo.read()

725

fo.close()

725

fo.close()

726

727

if data1 == data2:

727

if data1 == data2:

728

print(' data are identical')

728

print(' data are identical')

729

else:

729

else:

730

print(' ERROR: DATA DIFFER')

730

print(' ERROR: DATA DIFFER')

731

732

DEBUG = dbbackup

732

DEBUG = dbbackup

733

734

735

def test(url, N=10):

735

def test(url, N=10):

736

print("checking error handler (do this on a non-200)")

736

print("checking error handler (do this on a non-200)")

737

try: error_handler(url)

737

try: error_handler(url)

738

except IOError:

738

except IOError:

739

print("exiting - exception will prevent further tests")

739

print("exiting - exception will prevent further tests")

740

sys.exit()

740

sys.exit()

741

print('')

741

print('')

742

print("performing continuity test (making sure stuff isn't corrupted)")

742

print("performing continuity test (making sure stuff isn't corrupted)")

743

continuity(url)

743

continuity(url)

744

print('')

744

print('')

745

print("performing speed comparison")

745

print("performing speed comparison")

746

comp(N, url)

746

comp(N, url)

747

print('')

747

print('')

748

print("performing dropped-connection check")

748

print("performing dropped-connection check")

749

test_timeout(url)

749

test_timeout(url)

750

751

if __name__ == '__main__':

751

if __name__ == '__main__':

752

import time

752

import time

753

try:

753

try:

754

N = int(sys.argv[1])

754

N = int(sys.argv[1])

755

url = sys.argv[2]

755

url = sys.argv[2]

756

except (IndexError, ValueError):

756

except (IndexError, ValueError):

757

print("%s <integer> <url>" % sys.argv[0])

757

print("%s <integer> <url>" % sys.argv[0])

758

else:

758

else:

759

test(url, N)

759

test(url, N)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             #   This library is free software; you can redistribute it and/or
             #   modify it under the terms of the GNU Lesser General Public
             #   License as published by the Free Software Foundation; either
             #   version 2.1 of the License, or (at your option) any later version.
             #
             #   This library is distributed in the hope that it will be useful,
             #   but WITHOUT ANY WARRANTY; without even the implied warranty of
             #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
             #   Lesser General Public License for more details.
             #
             #   You should have received a copy of the GNU Lesser General Public
             #   License along with this library; if not, see
             #   <http://www.gnu.org/licenses/>.
             # This file is part of urlgrabber, a high-level cross-protocol url-grabber
             # Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
             # Modified by Benoit Boissinot:
             #  - fix for digest auth (inspired from urllib2.py @ Python v2.4)
             # Modified by Dirkjan Ochtman:
             #  - import md5 function from a local util module
             # Modified by Augie Fackler:
             #  - add safesend method and use it to prevent broken pipe errors
             #    on large POST requests
             """An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
             >>> import urllib2
             >>> from keepalive import HTTPHandler
             >>> keepalive_handler = HTTPHandler()
             >>> opener = urlreq.buildopener(keepalive_handler)
             >>> urlreq.installopener(opener)
             >>>
             >>> fo = urlreq.urlopen('http://www.python.org')
             If a connection to a given host is requested, and all of the existing
             connections are still in use, another connection will be opened.  If
             the handler tries to use an existing connection but it fails in some
             way, it will be closed and removed from the pool.
             To remove the handler, simply re-run build_opener with no arguments, and
             install that opener.
             You can explicitly close connections by using the close_connection()
             method of the returned file-like object (described below) or you can
             use the handler methods:
               close_connection(host)
               close_all()
               open_connections()
             NOTE: using the close_connection and close_all methods of the handler
             should be done with care when using multiple threads.
               * there is nothing that prevents another thread from creating new
                 connections immediately after connections are closed
               * no checks are done to prevent in-use connections from being closed
             >>> keepalive_handler.close_all()
             EXTRA ATTRIBUTES AND METHODS
               Upon a status of 200, the object returned has a few additional
               attributes and methods, which should not be used if you want to
               remain consistent with the normal urllib2-returned objects:
                 close_connection()  -  close the connection to the host
                 readlines()         -  you know, readlines()
                 status              -  the return status (i.e. 404)
                 reason              -  english translation of status (i.e. 'File not found')
               If you want the best of both worlds, use this inside an
               AttributeError-catching try:
               >>> try: status = fo.status
               >>> except AttributeError: status = None
               Unfortunately, these are ONLY there if status == 200, so it's not
               easy to distinguish between non-200 responses.  The reason is that
               urllib2 tries to do clever things with error codes 301, 302, 401,
               and 407, and it wraps the object upon return.
               For python versions earlier than 2.4, you can avoid this fancy error
               handling by setting the module-level global HANDLE_ERRORS to zero.
               You see, prior to 2.4, it's the HTTP Handler's job to determine what
               to handle specially, and what to just pass up.  HANDLE_ERRORS == 0
               means "pass everything up".  In python 2.4, however, this job no
               longer belongs to the HTTP Handler and is now done by a NEW handler,
               HTTPErrorProcessor.  Here's the bottom line:
                 python version < 2.4
                     HANDLE_ERRORS == 1  (default) pass up 200, treat the rest as
                                         errors
                     HANDLE_ERRORS == 0  pass everything up, error processing is
                                         left to the calling code
                 python version >= 2.4
                     HANDLE_ERRORS == 1  pass up 200, treat the rest as errors
                     HANDLE_ERRORS == 0  (default) pass everything up, let the
                                         other handlers (specifically,
                                         HTTPErrorProcessor) decide what to do
               In practice, setting the variable either way makes little difference
               in python 2.4, so for the most consistent behavior across versions,
               you probably just want to use the defaults, which will give you
               exceptions on errors.
             """
             # $Id: keepalive.py,v 1.14 2006/04/04 21:00:32 mstenner Exp $
             from __future__ import absolute_import, print_function
             import errno
             import hashlib
             import socket
             import sys
-            import thread
+            import threading
             from . import (
                 util,
             )
             httplib = util.httplib
             urlerr = util.urlerr
             urlreq = util.urlreq
             DEBUG = None
             if sys.version_info < (2, 4):
                 HANDLE_ERRORS = 1
             else: HANDLE_ERRORS = 0
             class ConnectionManager(object):
                 """
                 The connection manager must be able to:
                   * keep track of all existing
                   """
                 def __init__(self):
-                    self._lock = thread.allocate_lock()
+                    self._lock = threading.Lock()
                     self._hostmap = {} # map hosts to a list of connections
                     self._connmap = {} # map connections to host
                     self._readymap = {} # map connection to ready state
                 def add(self, host, connection, ready):
                     self._lock.acquire()
                     try:
                         if host not in self._hostmap:
                             self._hostmap[host] = []
                         self._hostmap[host].append(connection)
                         self._connmap[connection] = host
                         self._readymap[connection] = ready
                     finally:
                         self._lock.release()
                 def remove(self, connection):
                     self._lock.acquire()
                     try:
                         try:
                             host = self._connmap[connection]
                         except KeyError:
                             pass
                         else:
                             del self._connmap[connection]
                             del self._readymap[connection]
                             self._hostmap[host].remove(connection)
                             if not self._hostmap[host]: del self._hostmap[host]
                     finally:
                         self._lock.release()
                 def set_ready(self, connection, ready):
                     try:
                         self._readymap[connection] = ready
                     except KeyError:
                         pass
                 def get_ready_conn(self, host):
                     conn = None
                     self._lock.acquire()
                     try:
                         if host in self._hostmap:
                             for c in self._hostmap[host]:
                                 if self._readymap[c]:
                                     self._readymap[c] = 0
                                     conn = c
                                     break
                     finally:
                         self._lock.release()
                     return conn
                 def get_all(self, host=None):
                     if host:
                         return list(self._hostmap.get(host, []))
                     else:
                         return dict(self._hostmap)
             class KeepAliveHandler(object):
                 def __init__(self):
                     self._cm = ConnectionManager()
                 #### Connection Management
                 def open_connections(self):
                     """return a list of connected hosts and the number of connections
                     to each.  [('foo.com:80', 2), ('bar.org', 1)]"""
                     return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
                 def close_connection(self, host):
                     """close connection(s) to <host>
                     host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
                     no error occurs if there is no connection to that host."""
                     for h in self._cm.get_all(host):
                         self._cm.remove(h)
                         h.close()
                 def close_all(self):
                     """close all open connections"""
                     for host, conns in self._cm.get_all().iteritems():
                         for h in conns:
                             self._cm.remove(h)
                             h.close()
                 def _request_closed(self, request, host, connection):
                     """tells us that this request is now closed and that the
                     connection is ready for another request"""
                     self._cm.set_ready(connection, 1)
                 def _remove_connection(self, host, connection, close=0):
                     if close:
                         connection.close()
                     self._cm.remove(connection)
                 #### Transaction Execution
                 def http_open(self, req):
                     return self.do_open(HTTPConnection, req)
                 def do_open(self, http_class, req):
                     host = req.get_host()
                     if not host:
                         raise urlerr.urlerror('no host given')
                     try:
                         h = self._cm.get_ready_conn(host)
                         while h:
                             r = self._reuse_connection(h, req, host)
                             # if this response is non-None, then it worked and we're
                             # done.  Break out, skipping the else block.
                             if r:
                                 break
                             # connection is bad - possibly closed by server
                             # discard it and ask for the next free connection
                             h.close()
                             self._cm.remove(h)
                             h = self._cm.get_ready_conn(host)
                         else:
                             # no (working) free connections were found.  Create a new one.
                             h = http_class(host)
                             if DEBUG:
                                 DEBUG.info("creating new connection to %s (%d)",
                                            host, id(h))
                             self._cm.add(host, h, 0)
                             self._start_transaction(h, req)
                             r = h.getresponse()
                     except (socket.error, httplib.HTTPException) as err:
                         raise urlerr.urlerror(err)
                     # if not a persistent connection, don't try to reuse it
                     if r.will_close:
                         self._cm.remove(h)
                     if DEBUG:
                         DEBUG.info("STATUS: %s, %s", r.status, r.reason)
                     r._handler = self
                     r._host = host
                     r._url = req.get_full_url()
                     r._connection = h
                     r.code = r.status
                     r.headers = r.msg
                     r.msg = r.reason
                     if r.status == 200 or not HANDLE_ERRORS:
                         return r
                     else:
                         return self.parent.error('http', req, r,
                                                  r.status, r.msg, r.headers)
                 def _reuse_connection(self, h, req, host):
                     """start the transaction with a re-used connection
                     return a response object (r) upon success or None on failure.
                     This DOES not close or remove bad connections in cases where
                     it returns.  However, if an unexpected exception occurs, it
                     will close and remove the connection before re-raising.
                     """
                     try:
                         self._start_transaction(h, req)
                         r = h.getresponse()
                         # note: just because we got something back doesn't mean it
                         # worked.  We'll check the version below, too.
                     except (socket.error, httplib.HTTPException):
                         r = None
                     except: # re-raises
                         # adding this block just in case we've missed
                         # something we will still raise the exception, but
                         # lets try and close the connection and remove it
                         # first.  We previously got into a nasty loop
                         # where an exception was uncaught, and so the
                         # connection stayed open.  On the next try, the
                         # same exception was raised, etc.  The trade-off is
                         # that it's now possible this call will raise
                         # a DIFFERENT exception
                         if DEBUG:
                             DEBUG.error("unexpected exception - closing "
                                         "connection to %s (%d)", host, id(h))
                         self._cm.remove(h)
                         h.close()
                         raise
                     if r is None or r.version == 9:
                         # httplib falls back to assuming HTTP 0.9 if it gets a
                         # bad header back.  This is most likely to happen if
                         # the socket has been closed by the server since we
                         # last used the connection.
                         if DEBUG:
                             DEBUG.info("failed to re-use connection to %s (%d)",
                                        host, id(h))
                         r = None
                     else:
                         if DEBUG:
                             DEBUG.info("re-using connection to %s (%d)", host, id(h))
                     return r
                 def _start_transaction(self, h, req):
                     # What follows mostly reimplements HTTPConnection.request()
                     # except it adds self.parent.addheaders in the mix.
                     headers = req.headers.copy()
                     if sys.version_info >= (2, 4):
                         headers.update(req.unredirected_hdrs)
                     headers.update(self.parent.addheaders)
                     headers = dict((n.lower(), v) for n, v in headers.items())
                     skipheaders = {}
                     for n in ('host', 'accept-encoding'):
                         if n in headers:
                             skipheaders['skip_' + n.replace('-', '_')] = 1
                     try:
                         if req.has_data():
                             data = req.get_data()
                             h.putrequest('POST', req.get_selector(), **skipheaders)
                             if 'content-type' not in headers:
                                 h.putheader('Content-type',
                                             'application/x-www-form-urlencoded')
                             if 'content-length' not in headers:
                                 h.putheader('Content-length', '%d' % len(data))
                         else:
                             h.putrequest('GET', req.get_selector(), **skipheaders)
                     except socket.error as err:
                         raise urlerr.urlerror(err)
                     for k, v in headers.items():
                         h.putheader(k, v)
                     h.endheaders()
                     if req.has_data():
                         h.send(data)
             class HTTPHandler(KeepAliveHandler, urlreq.httphandler):
                 pass
             class HTTPResponse(httplib.HTTPResponse):
                 # we need to subclass HTTPResponse in order to
                 # 1) add readline() and readlines() methods
                 # 2) add close_connection() methods
                 # 3) add info() and geturl() methods
                 # in order to add readline(), read must be modified to deal with a
                 # buffer.  example: readline must read a buffer and then spit back
                 # one line at a time.  The only real alternative is to read one
                 # BYTE at a time (ick).  Once something has been read, it can't be
                 # put back (ok, maybe it can, but that's even uglier than this),
                 # so if you THEN do a normal read, you must first take stuff from
                 # the buffer.
                 # the read method wraps the original to accommodate buffering,
                 # although read() never adds to the buffer.
                 # Both readline and readlines have been stolen with almost no
                 # modification from socket.py
                 def __init__(self, sock, debuglevel=0, strict=0, method=None):
                     httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
                     self.fileno = sock.fileno
                     self.code = None
                     self._rbuf = ''
                     self._rbufsize = 8096
                     self._handler = None # inserted by the handler later
                     self._host = None    # (same)
                     self._url = None     # (same)
                     self._connection = None # (same)
                 _raw_read = httplib.HTTPResponse.read
                 def close(self):
                     if self.fp:
                         self.fp.close()
                         self.fp = None
                         if self._handler:
                             self._handler._request_closed(self, self._host,
                                                           self._connection)
                 def close_connection(self):
                     self._handler._remove_connection(self._host, self._connection, close=1)
                     self.close()
                 def info(self):
                     return self.headers
                 def geturl(self):
                     return self._url
                 def read(self, amt=None):
                     # the _rbuf test is only in this first if for speed.  It's not
                     # logically necessary
                     if self._rbuf and not amt is None:
                         L = len(self._rbuf)
                         if amt > L:
                             amt -= L
                         else:
                             s = self._rbuf[:amt]
                             self._rbuf = self._rbuf[amt:]
                             return s
                     s = self._rbuf + self._raw_read(amt)
                     self._rbuf = ''
                     return s
                 # stolen from Python SVN #68532 to fix issue1088
                 def _read_chunked(self, amt):
                     chunk_left = self.chunk_left
                     value = ''
                     # XXX This accumulates chunks by repeated string concatenation,
                     # which is not efficient as the number or size of chunks gets big.
                     while True:
                         if chunk_left is None:
                             line = self.fp.readline()
                             i = line.find(';')
                             if i >= 0:
                                 line = line[:i] # strip chunk-extensions
                             try:
                                 chunk_left = int(line, 16)
                             except ValueError:
                                 # close the connection as protocol synchronization is
                                 # probably lost
                                 self.close()
                                 raise httplib.IncompleteRead(value)
                             if chunk_left == 0:
                                 break
                         if amt is None:
                             value += self._safe_read(chunk_left)
                         elif amt < chunk_left:
                             value += self._safe_read(amt)
                             self.chunk_left = chunk_left - amt
                             return value
                         elif amt == chunk_left:
                             value += self._safe_read(amt)
                             self._safe_read(2)  # toss the CRLF at the end of the chunk
                             self.chunk_left = None
                             return value
                         else:
                             value += self._safe_read(chunk_left)
                             amt -= chunk_left
                         # we read the whole chunk, get another
                         self._safe_read(2)      # toss the CRLF at the end of the chunk
                         chunk_left = None
                     # read and discard trailer up to the CRLF terminator
                     ### note: we shouldn't have any trailers!
                     while True:
                         line = self.fp.readline()
                         if not line:
                             # a vanishingly small number of sites EOF without
                             # sending the trailer
                             break
                         if line == '\r\n':
                             break
                     # we read everything; close the "file"
                     self.close()
                     return value
                 def readline(self, limit=-1):
                     i = self._rbuf.find('\n')
                     while i < 0 and not (0 < limit <= len(self._rbuf)):
                         new = self._raw_read(self._rbufsize)
                         if not new:
                             break
                         i = new.find('\n')
                         if i >= 0:
                             i = i + len(self._rbuf)
                         self._rbuf = self._rbuf + new
                     if i < 0:
                         i = len(self._rbuf)
                     else:
                         i = i + 1
                     if 0 <= limit < len(self._rbuf):
                         i = limit
                     data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
                     return data
                 def readlines(self, sizehint=0):
                     total = 0
                     list = []
                     while True:
                         line = self.readline()
                         if not line:
                             break
                         list.append(line)
                         total += len(line)
                         if sizehint and total >= sizehint:
                             break
                     return list
             def safesend(self, str):
                 """Send `str' to the server.
                 Shamelessly ripped off from httplib to patch a bad behavior.
                 """
                 # _broken_pipe_resp is an attribute we set in this function
                 # if the socket is closed while we're sending data but
                 # the server sent us a response before hanging up.
                 # In that case, we want to pretend to send the rest of the
                 # outgoing data, and then let the user use getresponse()
                 # (which we wrap) to get this last response before
                 # opening a new socket.
                 if getattr(self, '_broken_pipe_resp', None) is not None:
                     return
                 if self.sock is None:
                     if self.auto_open:
                         self.connect()
                     else:
                         raise httplib.NotConnected
                 # send the data to the server. if we get a broken pipe, then close
                 # the socket. we want to reconnect when somebody tries to send again.
                 #
                 # NOTE: we DO propagate the error, though, because we cannot simply
                 #       ignore the error... the caller will know if they can retry.
                 if self.debuglevel > 0:
                     print("send:", repr(str))
                 try:
                     blocksize = 8192
                     read = getattr(str, 'read', None)
                     if read is not None:
                         if self.debuglevel > 0:
                             print("sending a read()able")
                         data = read(blocksize)
                         while data:
                             self.sock.sendall(data)
                             data = read(blocksize)
                     else:
                         self.sock.sendall(str)
                 except socket.error as v:
                     reraise = True
                     if v[0] == errno.EPIPE:      # Broken pipe
                         if self._HTTPConnection__state == httplib._CS_REQ_SENT:
                             self._broken_pipe_resp = None
                             self._broken_pipe_resp = self.getresponse()
                             reraise = False
                         self.close()
                     if reraise:
                         raise
             def wrapgetresponse(cls):
                 """Wraps getresponse in cls with a broken-pipe sane version.
                 """
                 def safegetresponse(self):
                     # In safesend() we might set the _broken_pipe_resp
                     # attribute, in which case the socket has already
                     # been closed and we just need to give them the response
                     # back. Otherwise, we use the normal response path.
                     r = getattr(self, '_broken_pipe_resp', None)
                     if r is not None:
                         return r
                     return cls.getresponse(self)
                 safegetresponse.__doc__ = cls.getresponse.__doc__
                 return safegetresponse
             class HTTPConnection(httplib.HTTPConnection):
                 # use the modified response class
                 response_class = HTTPResponse
                 send = safesend
                 getresponse = wrapgetresponse(httplib.HTTPConnection)
             #########################################################################
             #####   TEST FUNCTIONS
             #########################################################################
             def error_handler(url):
                 global HANDLE_ERRORS
                 orig = HANDLE_ERRORS
                 keepalive_handler = HTTPHandler()
                 opener = urlreq.buildopener(keepalive_handler)
                 urlreq.installopener(opener)
                 pos = {0: 'off', 1: 'on'}
                 for i in (0, 1):
                     print("  fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))
                     HANDLE_ERRORS = i
                     try:
                         fo = urlreq.urlopen(url)
                         fo.read()
                         fo.close()
                         try:
                             status, reason = fo.status, fo.reason
                         except AttributeError:
                             status, reason = None, None
                     except IOError as e:
                         print("  EXCEPTION: %s" % e)
                         raise
                     else:
                         print("  status = %s, reason = %s" % (status, reason))
                 HANDLE_ERRORS = orig
                 hosts = keepalive_handler.open_connections()
                 print("open connections:", hosts)
                 keepalive_handler.close_all()
             def continuity(url):
                 md5 = hashlib.md5
                 format = '%25s: %s'
                 # first fetch the file with the normal http handler
                 opener = urlreq.buildopener()
                 urlreq.installopener(opener)
                 fo = urlreq.urlopen(url)
                 foo = fo.read()
                 fo.close()
                 m = md5(foo)
                 print(format % ('normal urllib', m.hexdigest()))
                 # now install the keepalive handler and try again
                 opener = urlreq.buildopener(HTTPHandler())
                 urlreq.installopener(opener)
                 fo = urlreq.urlopen(url)
                 foo = fo.read()
                 fo.close()
                 m = md5(foo)
                 print(format % ('keepalive read', m.hexdigest()))
                 fo = urlreq.urlopen(url)
                 foo = ''
                 while True:
                     f = fo.readline()
                     if f:
                         foo = foo + f
                     else: break
                 fo.close()
                 m = md5(foo)
                 print(format % ('keepalive readline', m.hexdigest()))
             def comp(N, url):
                 print('  making %i connections to:\n  %s' % (N, url))
                 sys.stdout.write('  first using the normal urllib handlers')
                 # first use normal opener
                 opener = urlreq.buildopener()
                 urlreq.installopener(opener)
                 t1 = fetch(N, url)
                 print('  TIME: %.3f s' % t1)
                 sys.stdout.write('  now using the keepalive handler       ')
                 # now install the keepalive handler and try again
                 opener = urlreq.buildopener(HTTPHandler())
                 urlreq.installopener(opener)
                 t2 = fetch(N, url)
                 print('  TIME: %.3f s' % t2)
                 print('  improvement factor: %.2f' % (t1 / t2))
             def fetch(N, url, delay=0):
                 import time
                 lens = []
                 starttime = time.time()
                 for i in range(N):
                     if delay and i > 0:
                         time.sleep(delay)
                     fo = urlreq.urlopen(url)
                     foo = fo.read()
                     fo.close()
                     lens.append(len(foo))
                 diff = time.time() - starttime
                 j = 0
                 for i in lens[1:]:
                     j = j + 1
                     if not i == lens[0]:
                         print("WARNING: inconsistent length on read %i: %i" % (j, i))
                 return diff
             def test_timeout(url):
                 global DEBUG
                 dbbackup = DEBUG
                 class FakeLogger(object):
                     def debug(self, msg, *args):
                         print(msg % args)
                     info = warning = error = debug
                 DEBUG = FakeLogger()
                 print("  fetching the file to establish a connection")
                 fo = urlreq.urlopen(url)
                 data1 = fo.read()
                 fo.close()
                 i = 20
                 print("  waiting %i seconds for the server to close the connection" % i)
                 while i > 0:
                     sys.stdout.write('\r  %2i' % i)
                     sys.stdout.flush()
                     time.sleep(1)
                     i -= 1
                 sys.stderr.write('\r')
                 print("  fetching the file a second time")
                 fo = urlreq.urlopen(url)
                 data2 = fo.read()
                 fo.close()
                 if data1 == data2:
                     print('  data are identical')
                 else:
                     print('  ERROR: DATA DIFFER')
                 DEBUG = dbbackup
             def test(url, N=10):
                 print("checking error handler (do this on a non-200)")
                 try: error_handler(url)
                 except IOError:
                     print("exiting - exception will prevent further tests")
                     sys.exit()
                 print('')
                 print("performing continuity test (making sure stuff isn't corrupted)")
                 continuity(url)
                 print('')
                 print("performing speed comparison")
                 comp(N, url)
                 print('')
                 print("performing dropped-connection check")
                 test_timeout(url)
             if __name__ == '__main__':
                 import time
                 try:
                     N = int(sys.argv[1])
                     url = sys.argv[2]
                 except (IndexError, ValueError):
                     print("%s <integer> <url>" % sys.argv[0])
                 else:
                     test(url, N)