upstream/mercurial-mirror Commit - r37607:e320d940

1

# hgweb/request.py - An http request from either CGI or the standalone server.

1

# hgweb/request.py - An http request from either CGI or the standalone server.

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import wsgiref.headers as wsgiheaders

11

import wsgiref.headers as wsgiheaders

12

#import wsgiref.validate

12

#import wsgiref.validate

13

14

from ..thirdparty import (

14

from ..thirdparty import (

15

attr,

15

attr,

16

)

16

)

17

from .. import (

17

from .. import (

18

error,

18

error,

19

pycompat,

19

pycompat,

20

util,

20

util,

21

)

21

)

22

23

class multidict(object):

23

class multidict(object):

24

"""A dict like object that can store multiple values for a key.

24

"""A dict like object that can store multiple values for a key.

25

26

Used to store parsed request parameters.

26

Used to store parsed request parameters.

27

28

This is inspired by WebOb's class of the same name.

28

This is inspired by WebOb's class of the same name.

29

"""

29

"""

30

def __init__(self):

30

def __init__(self):

31

self._items = {}

31

self._items = {}

32

33

def __getitem__(self, key):

33

def __getitem__(self, key):

34

"""Returns the last set value for a key."""

34

"""Returns the last set value for a key."""

35

return self._items[key][-1]

35

return self._items[key][-1]

36

37

def __setitem__(self, key, value):

37

def __setitem__(self, key, value):

38

"""Replace a values for a key with a new value."""

38

"""Replace a values for a key with a new value."""

39

self._items[key] = [value]

39

self._items[key] = [value]

40

41

def __delitem__(self, key):

41

def __delitem__(self, key):

42

"""Delete all values for a key."""

42

"""Delete all values for a key."""

43

del self._items[key]

43

del self._items[key]

44

45

def __contains__(self, key):

45

def __contains__(self, key):

46

return key in self._items

46

return key in self._items

47

48

def __len__(self):

48

def __len__(self):

49

return len(self._items)

49

return len(self._items)

50

51

def get(self, key, default=None):

51

def get(self, key, default=None):

52

try:

52

try:

53

return self.__getitem__(key)

53

return self.__getitem__(key)

54

except KeyError:

54

except KeyError:

55

return default

55

return default

56

57

def add(self, key, value):

57

def add(self, key, value):

58

"""Add a new value for a key. Does not replace existing values."""

58

"""Add a new value for a key. Does not replace existing values."""

59

self._items.setdefault(key, []).append(value)

59

self._items.setdefault(key, []).append(value)

60

61

def getall(self, key):

61

def getall(self, key):

62

"""Obtains all values for a key."""

62

"""Obtains all values for a key."""

63

return self._items.get(key, [])

63

return self._items.get(key, [])

64

65

def getone(self, key):

65

def getone(self, key):

66

"""Obtain a single value for a key.

66

"""Obtain a single value for a key.

67

68

Raises KeyError if key not defined or it has multiple values set.

68

Raises KeyError if key not defined or it has multiple values set.

69

"""

69

"""

70

vals = self._items[key]

70

vals = self._items[key]

71

72

if len(vals) > 1:

72

if len(vals) > 1:

73

raise KeyError('multiple values for %r' % key)

73

raise KeyError('multiple values for %r' % key)

74

75

return vals[0]

75

return vals[0]

76

77

def asdictoflists(self):

77

def asdictoflists(self):

78

return {k: list(v) for k, v in self._items.iteritems()}

78

return {k: list(v) for k, v in self._items.iteritems()}

79

80

@attr.s(frozen=True)

80

@attr.s(frozen=True)

81

class parsedrequest(object):

81

class parsedrequest(object):

82

"""Represents a parsed WSGI request.

82

"""Represents a parsed WSGI request.

83

84

Contains both parsed parameters as well as a handle on the input stream.

84

Contains both parsed parameters as well as a handle on the input stream.

85

"""

85

"""

86

87

# Request method.

87

# Request method.

88

method = attr.ib()

88

method = attr.ib()

89

# Full URL for this request.

89

# Full URL for this request.

90

url = attr.ib()

90

url = attr.ib()

91

# URL without any path components. Just <proto>://<host><port>.

91

# URL without any path components. Just <proto>://<host><port>.

92

baseurl = attr.ib()

92

baseurl = attr.ib()

93

# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead

93

# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead

94

# of HTTP: Host header for hostname. This is likely what clients used.

94

# of HTTP: Host header for hostname. This is likely what clients used.

95

advertisedurl = attr.ib()

95

advertisedurl = attr.ib()

96

advertisedbaseurl = attr.ib()

96

advertisedbaseurl = attr.ib()

97

# URL scheme (part before ``://``). e.g. ``http`` or ``https``.

97

# URL scheme (part before ``://``). e.g. ``http`` or ``https``.

98

urlscheme = attr.ib()

98

urlscheme = attr.ib()

99

# Value of REMOTE_USER, if set, or None.

99

# Value of REMOTE_USER, if set, or None.

100

remoteuser = attr.ib()

100

remoteuser = attr.ib()

101

# Value of REMOTE_HOST, if set, or None.

101

# Value of REMOTE_HOST, if set, or None.

102

remotehost = attr.ib()

102

remotehost = attr.ib()

103

# Relative WSGI application path. If defined, will begin with a

103

# Relative WSGI application path. If defined, will begin with a

104

# ``/``.

104

# ``/``.

105

apppath = attr.ib()

105

apppath = attr.ib()

106

# List of path parts to be used for dispatch.

106

# List of path parts to be used for dispatch.

107

dispatchparts = attr.ib()

107

dispatchparts = attr.ib()

108

# URL path component (no query string) used for dispatch. Can be

108

# URL path component (no query string) used for dispatch. Can be

109

# ``None`` to signal no path component given to the request, an

109

# ``None`` to signal no path component given to the request, an

110

# empty string to signal a request to the application's root URL,

110

# empty string to signal a request to the application's root URL,

111

# or a string not beginning with ``/`` containing the requested

111

# or a string not beginning with ``/`` containing the requested

112

# path under the application.

112

# path under the application.

113

dispatchpath = attr.ib()

113

dispatchpath = attr.ib()

114

# The name of the repository being accessed.

114

# The name of the repository being accessed.

115

reponame = attr.ib()

115

reponame = attr.ib()

116

# Raw query string (part after "?" in URL).

116

# Raw query string (part after "?" in URL).

117

querystring = attr.ib()

117

querystring = attr.ib()

118

# multidict of query string parameters.

118

# multidict of query string parameters.

119

qsparams = attr.ib()

119

qsparams = attr.ib()

120

# wsgiref.headers.Headers instance. Operates like a dict with case

120

# wsgiref.headers.Headers instance. Operates like a dict with case

121

# insensitive keys.

121

# insensitive keys.

122

headers = attr.ib()

122

headers = attr.ib()

123

# Request body input stream.

123

# Request body input stream.

124

bodyfh = attr.ib()

124

bodyfh = attr.ib()

125

# WSGI environment dict, unmodified.

125

# WSGI environment dict, unmodified.

126

rawenv = attr.ib()

126

rawenv = attr.ib()

127

128

def parserequestfromenv(env, reponame=None, altbaseurl=None):

128

def parserequestfromenv(env, reponame=None, altbaseurl=None):

129

"""Parse URL components from environment variables.

129

"""Parse URL components from environment variables.

130

131

WSGI defines request attributes via environment variables. This function

131

WSGI defines request attributes via environment variables. This function

132

parses the environment variables into a data structure.

132

parses the environment variables into a data structure.

133

134

If ``reponame`` is defined, the leading path components matching that

134

If ``reponame`` is defined, the leading path components matching that

135

string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.

135

string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.

136

This simulates the world view of a WSGI application that processes

136

This simulates the world view of a WSGI application that processes

137

requests from the base URL of a repo.

137

requests from the base URL of a repo.

138

139

If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)

139

If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)

140

is defined, it is used - instead of the WSGI environment variables - for

140

is defined, it is used - instead of the WSGI environment variables - for

141

constructing URL components up to and including the WSGI application path.

141

constructing URL components up to and including the WSGI application path.

142

For example, if the current WSGI application is at ``/repo`` and a request

142

For example, if the current WSGI application is at ``/repo`` and a request

143

is made to ``/rev/@`` with this argument set to

143

is made to ``/rev/@`` with this argument set to

144

``http://myserver:9000/prefix``, the URL and path components will resolve as

144

``http://myserver:9000/prefix``, the URL and path components will resolve as

145

if the request were to ``http://myserver:9000/prefix/rev/@``. In other

145

if the request were to ``http://myserver:9000/prefix/rev/@``. In other

146

words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and

146

words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and

147

``SCRIPT_NAME`` are all effectively replaced by components from this URL.

147

``SCRIPT_NAME`` are all effectively replaced by components from this URL.

148

"""

148

"""

149

# PEP 3333 defines the WSGI spec and is a useful reference for this code.

149

# PEP 3333 defines the WSGI spec and is a useful reference for this code.

150

151

# We first validate that the incoming object conforms with the WSGI spec.

151

# We first validate that the incoming object conforms with the WSGI spec.

152

# We only want to be dealing with spec-conforming WSGI implementations.

152

# We only want to be dealing with spec-conforming WSGI implementations.

153

# TODO enable this once we fix internal violations.

153

# TODO enable this once we fix internal violations.

154

#wsgiref.validate.check_environ(env)

154

#wsgiref.validate.check_environ(env)

155

156

# PEP-0333 states that environment keys and values are native strings

156

# PEP-0333 states that environment keys and values are native strings

157

# (bytes on Python 2 and str on Python 3). The code points for the Unicode

157

# (bytes on Python 2 and str on Python 3). The code points for the Unicode

158

# strings on Python 3 must be between \00000-\000FF. We deal with bytes

158

# strings on Python 3 must be between \00000-\000FF. We deal with bytes

159

# in Mercurial, so mass convert string keys and values to bytes.

159

# in Mercurial, so mass convert string keys and values to bytes.

160

if pycompat.ispy3:

160

if pycompat.ispy3:

161

env = {k.encode('latin-1'): v for k, v in env.iteritems()}

161

env = {k.encode('latin-1'): v for k, v in env.iteritems()}

162

env = {k: v.encode('latin-1') if isinstance(v, str) else v

162

env = {k: v.encode('latin-1') if isinstance(v, str) else v

163

for k, v in env.iteritems()}

163

for k, v in env.iteritems()}

164

165

if altbaseurl:

165

if altbaseurl:

166

altbaseurl = util.url(altbaseurl)

166

altbaseurl = util.url(altbaseurl)

167

168

# https://www.python.org/dev/peps/pep-0333/#environ-variables defines

168

# https://www.python.org/dev/peps/pep-0333/#environ-variables defines

169

# the environment variables.

169

# the environment variables.

170

# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines

170

# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines

171

# how URLs are reconstructed.

171

# how URLs are reconstructed.

172

fullurl = env['wsgi.url_scheme'] + '://'

172

fullurl = env['wsgi.url_scheme'] + '://'

173

174

if altbaseurl and altbaseurl.scheme:

174

if altbaseurl and altbaseurl.scheme:

175

advertisedfullurl = altbaseurl.scheme + '://'

175

advertisedfullurl = altbaseurl.scheme + '://'

176

else:

176

else:

177

advertisedfullurl = fullurl

177

advertisedfullurl = fullurl

178

179

def addport(s, port):

179

def addport(s, port):

180

if s.startswith('https://'):

180

if s.startswith('https://'):

181

if port != '443':

181

if port != '443':

182

s += ':' + port

182

s += ':' + port

183

else:

183

else:

184

if port != '80':

184

if port != '80':

185

s += ':' + port

185

s += ':' + port

186

187

return s

187

return s

188

189

if env.get('HTTP_HOST'):

189

if env.get('HTTP_HOST'):

190

fullurl += env['HTTP_HOST']

190

fullurl += env['HTTP_HOST']

191

else:

191

else:

192

fullurl += env['SERVER_NAME']

192

fullurl += env['SERVER_NAME']

193

fullurl = addport(fullurl, env['SERVER_PORT'])

193

fullurl = addport(fullurl, env['SERVER_PORT'])

194

195

if altbaseurl and altbaseurl.host:

195

if altbaseurl and altbaseurl.host:

196

advertisedfullurl += altbaseurl.host

196

advertisedfullurl += altbaseurl.host

197

198

if altbaseurl.port:

198

if altbaseurl.port:

199

port = altbaseurl.port

199

port = altbaseurl.port

200

elif altbaseurl.scheme == 'http' and not altbaseurl.port:

200

elif altbaseurl.scheme == 'http' and not altbaseurl.port:

201

port = '80'

201

port = '80'

202

elif altbaseurl.scheme == 'https' and not altbaseurl.port:

202

elif altbaseurl.scheme == 'https' and not altbaseurl.port:

203

port = '443'

203

port = '443'

204

else:

204

else:

205

port = env['SERVER_PORT']

205

port = env['SERVER_PORT']

206

207

advertisedfullurl = addport(advertisedfullurl, port)

207

advertisedfullurl = addport(advertisedfullurl, port)

208

else:

208

else:

209

advertisedfullurl += env['SERVER_NAME']

209

advertisedfullurl += env['SERVER_NAME']

210

advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])

210

advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])

211

212

baseurl = fullurl

212

baseurl = fullurl

213

advertisedbaseurl = advertisedfullurl

213

advertisedbaseurl = advertisedfullurl

214

215

fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

215

fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

216

fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

216

fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

217

218

if altbaseurl:

218

if altbaseurl:

219

path = altbaseurl.path or ''

219

path = altbaseurl.path or ''

220

if path and not path.startswith('/'):

220

if path and not path.startswith('/'):

221

path = '/' + path

221

path = '/' + path

222

advertisedfullurl += util.urlreq.quote(path)

222

advertisedfullurl += util.urlreq.quote(path)

223

else:

223

else:

224

advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

224

advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

225

226

advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

226

advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

227

228

if env.get('QUERY_STRING'):

228

if env.get('QUERY_STRING'):

229

fullurl += '?' + env['QUERY_STRING']

229

fullurl += '?' + env['QUERY_STRING']

230

advertisedfullurl += '?' + env['QUERY_STRING']

230

advertisedfullurl += '?' + env['QUERY_STRING']

231

232

# If ``reponame`` is defined, that must be a prefix on PATH_INFO

232

# If ``reponame`` is defined, that must be a prefix on PATH_INFO

233

# that represents the repository being dispatched to. When computing

233

# that represents the repository being dispatched to. When computing

234

# the dispatch info, we ignore these leading path components.

234

# the dispatch info, we ignore these leading path components.

235

236

if altbaseurl:

236

if altbaseurl:

237

apppath = altbaseurl.path or ''

237

apppath = altbaseurl.path or ''

238

if apppath and not apppath.startswith('/'):

238

if apppath and not apppath.startswith('/'):

239

apppath = '/' + apppath

239

apppath = '/' + apppath

240

else:

240

else:

241

apppath = env.get('SCRIPT_NAME', '')

241

apppath = env.get('SCRIPT_NAME', '')

242

243

if reponame:

243

if reponame:

244

repoprefix = '/' + reponame.strip('/')

244

repoprefix = '/' + reponame.strip('/')

245

246

if not env.get('PATH_INFO'):

246

if not env.get('PATH_INFO'):

247

raise error.ProgrammingError('reponame requires PATH_INFO')

247

raise error.ProgrammingError('reponame requires PATH_INFO')

248

249

if not env['PATH_INFO'].startswith(repoprefix):

249

if not env['PATH_INFO'].startswith(repoprefix):

250

raise error.ProgrammingError('PATH_INFO does not begin with repo '

250

raise error.ProgrammingError('PATH_INFO does not begin with repo '

251

'name: %s (%s)' % (env['PATH_INFO'],

251

'name: %s (%s)' % (env['PATH_INFO'],

252

reponame))

252

reponame))

253

254

dispatchpath = env['PATH_INFO'][len(repoprefix):]

254

dispatchpath = env['PATH_INFO'][len(repoprefix):]

255

256

if dispatchpath and not dispatchpath.startswith('/'):

256

if dispatchpath and not dispatchpath.startswith('/'):

257

raise error.ProgrammingError('reponame prefix of PATH_INFO does '

257

raise error.ProgrammingError('reponame prefix of PATH_INFO does '

258

'not end at path delimiter: %s (%s)' %

258

'not end at path delimiter: %s (%s)' %

259

(env['PATH_INFO'], reponame))

259

(env['PATH_INFO'], reponame))

260

261

apppath = apppath.rstrip('/') + repoprefix

261

apppath = apppath.rstrip('/') + repoprefix

262

dispatchparts = dispatchpath.strip('/').split('/')

262

dispatchparts = dispatchpath.strip('/').split('/')

263

dispatchpath = '/'.join(dispatchparts)

263

dispatchpath = '/'.join(dispatchparts)

264

265

elif 'PATH_INFO' in env:

265

elif 'PATH_INFO' in env:

266

if env['PATH_INFO'].strip('/'):

266

if env['PATH_INFO'].strip('/'):

267

dispatchparts = env['PATH_INFO'].strip('/').split('/')

267

dispatchparts = env['PATH_INFO'].strip('/').split('/')

268

dispatchpath = '/'.join(dispatchparts)

268

dispatchpath = '/'.join(dispatchparts)

269

else:

269

else:

270

dispatchparts = []

270

dispatchparts = []

271

dispatchpath = ''

271

dispatchpath = ''

272

else:

272

else:

273

dispatchparts = []

273

dispatchparts = []

274

dispatchpath = None

274

dispatchpath = None

275

276

querystring = env.get('QUERY_STRING', '')

276

querystring = env.get('QUERY_STRING', '')

277

278

# We store as a list so we have ordering information. We also store as

278

# We store as a list so we have ordering information. We also store as

279

# a dict to facilitate fast lookup.

279

# a dict to facilitate fast lookup.

280

qsparams = multidict()

280

qsparams = multidict()

281

for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):

281

for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):

282

qsparams.add(k, v)

282

qsparams.add(k, v)

283

284

# HTTP_* keys contain HTTP request headers. The Headers structure should

284

# HTTP_* keys contain HTTP request headers. The Headers structure should

285

# perform case normalization for us. We just rewrite underscore to dash

285

# perform case normalization for us. We just rewrite underscore to dash

286

# so keys match what likely went over the wire.

286

# so keys match what likely went over the wire.

287

headers = []

287

headers = []

288

for k, v in env.iteritems():

288

for k, v in env.iteritems():

289

if k.startswith('HTTP_'):

289

if k.startswith('HTTP_'):

290

headers.append((k[len('HTTP_'):].replace('_', '-'), v))

290

headers.append((k[len('HTTP_'):].replace('_', '-'), v))

291

292

headers = wsgiheaders.Headers(headers)

292

headers = wsgiheaders.Headers(headers)

293

294

# This is kind of a lie because the HTTP header wasn't explicitly

294

# This is kind of a lie because the HTTP header wasn't explicitly

295

# sent. But for all intents and purposes it should be OK to lie about

295

# sent. But for all intents and purposes it should be OK to lie about

296

# this, since a consumer will either either value to determine how many

296

# this, since a consumer will either either value to determine how many

297

# bytes are available to read.

297

# bytes are available to read.

298

if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:

298

if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:

299

headers['Content-Length'] = env['CONTENT_LENGTH']

299

headers['Content-Length'] = env['CONTENT_LENGTH']

300

301

if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:

301

if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:

302

headers['Content-Type'] = env['CONTENT_TYPE']

302

headers['Content-Type'] = env['CONTENT_TYPE']

303

304

bodyfh = env['wsgi.input']

304

bodyfh = env['wsgi.input']

305

if 'Content-Length' in headers:

305

if 'Content-Length' in headers:

306

bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))

306

bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))

307

308

return parsedrequest(method=env['REQUEST_METHOD'],

308

return parsedrequest(method=env['REQUEST_METHOD'],

309

url=fullurl, baseurl=baseurl,

309

url=fullurl, baseurl=baseurl,

310

advertisedurl=advertisedfullurl,

310

advertisedurl=advertisedfullurl,

311

advertisedbaseurl=advertisedbaseurl,

311

advertisedbaseurl=advertisedbaseurl,

312

urlscheme=env['wsgi.url_scheme'],

312

urlscheme=env['wsgi.url_scheme'],

313

remoteuser=env.get('REMOTE_USER'),

313

remoteuser=env.get('REMOTE_USER'),

314

remotehost=env.get('REMOTE_HOST'),

314

remotehost=env.get('REMOTE_HOST'),

315

apppath=apppath,

315

apppath=apppath,

316

dispatchparts=dispatchparts, dispatchpath=dispatchpath,

316

dispatchparts=dispatchparts, dispatchpath=dispatchpath,

317

reponame=reponame,

317

reponame=reponame,

318

querystring=querystring,

318

querystring=querystring,

319

qsparams=qsparams,

319

qsparams=qsparams,

320

headers=headers,

320

headers=headers,

321

bodyfh=bodyfh,

321

bodyfh=bodyfh,

322

rawenv=env)

322

rawenv=env)

323

324

class offsettrackingwriter(object):

324

class offsettrackingwriter(object):

325

"""A file object like object that is append only and tracks write count.

325

"""A file object like object that is append only and tracks write count.

326

327

Instances are bound to a callable. This callable is called with data

327

Instances are bound to a callable. This callable is called with data

328

whenever a ``write()`` is attempted.

328

whenever a ``write()`` is attempted.

329

330

Instances track the amount of written data so they can answer ``tell()``

330

Instances track the amount of written data so they can answer ``tell()``

331

requests.

331

requests.

332

333

The intent of this class is to wrap the ``write()`` function returned by

333

The intent of this class is to wrap the ``write()`` function returned by

334

a WSGI ``start_response()`` function. Since ``write()`` is a callable and

334

a WSGI ``start_response()`` function. Since ``write()`` is a callable and

335

not a file object, it doesn't implement other file object methods.

335

not a file object, it doesn't implement other file object methods.

336

"""

336

"""

337

def __init__(self, writefn):

337

def __init__(self, writefn):

338

self._write = writefn

338

self._write = writefn

339

self._offset = 0

339

self._offset = 0

340

341

def write(self, s):

341

def write(self, s):

342

res = self._write(s)

342

res = self._write(s)

343

# Some Python objects don't report the number of bytes written.

343

# Some Python objects don't report the number of bytes written.

344

if res is None:

344

if res is None:

345

self._offset += len(s)

345

self._offset += len(s)

346

else:

346

else:

347

self._offset += res

347

self._offset += res

348

349

def flush(self):

349

def flush(self):

350

pass

350

pass

351

352

def tell(self):

352

def tell(self):

353

return self._offset

353

return self._offset

354

355

class wsgiresponse(object):

355

class wsgiresponse(object):

356

"""Represents a response to a WSGI request.

356

"""Represents a response to a WSGI request.

357

358

A response consists of a status line, headers, and a body.

358

A response consists of a status line, headers, and a body.

359

360

Consumers must populate the ``status`` and ``headers`` fields and

360

Consumers must populate the ``status`` and ``headers`` fields and

361

make a call to a ``setbody*()`` method before the response can be

361

make a call to a ``setbody*()`` method before the response can be

362

issued.

362

issued.

363

364

When it is time to start sending the response over the wire,

364

When it is time to start sending the response over the wire,

365

``sendresponse()`` is called. It handles emitting the header portion

365

``sendresponse()`` is called. It handles emitting the header portion

366

of the response message. It then yields chunks of body data to be

366

of the response message. It then yields chunks of body data to be

367

written to the peer. Typically, the WSGI application itself calls

367

written to the peer. Typically, the WSGI application itself calls

368

and returns the value from ``sendresponse()``.

368

and returns the value from ``sendresponse()``.

369

"""

369

"""

370

371

def __init__(self, req, startresponse):

371

def __init__(self, req, startresponse):

372

"""Create an empty response tied to a specific request.

372

"""Create an empty response tied to a specific request.

373

374

``req`` is a ``parsedrequest``. ``startresponse`` is the

374

``req`` is a ``parsedrequest``. ``startresponse`` is the

375

``start_response`` function passed to the WSGI application.

375

``start_response`` function passed to the WSGI application.

376

"""

376

"""

377

self._req = req

377

self._req = req

378

self._startresponse = startresponse

378

self._startresponse = startresponse

379

380

self.status = None

380

self.status = None

381

self.headers = wsgiheaders.Headers([])

381

self.headers = wsgiheaders.Headers([])

382

383

self._bodybytes = None

383

self._bodybytes = None

384

self._bodygen = None

384

self._bodygen = None

385

self._bodywillwrite = False

385

self._bodywillwrite = False

386

self._started = False

386

self._started = False

387

self._bodywritefn = None

387

self._bodywritefn = None

388

389

def _verifybody(self):

389

def _verifybody(self):

390

if (self._bodybytes is not None or self._bodygen is not None

390

if (self._bodybytes is not None or self._bodygen is not None

391

or self._bodywillwrite):

391

or self._bodywillwrite):

392

raise error.ProgrammingError('cannot define body multiple times')

392

raise error.ProgrammingError('cannot define body multiple times')

393

394

def setbodybytes(self, b):

394

def setbodybytes(self, b):

395

"""Define the response body as static bytes.

395

"""Define the response body as static bytes.

396

397

The empty string signals that there is no response body.

397

The empty string signals that there is no response body.

398

"""

398

"""

399

self._verifybody()

399

self._verifybody()

400

self._bodybytes = b

400

self._bodybytes = b

401

self.headers['Content-Length'] = '%d' % len(b)

401

self.headers['Content-Length'] = '%d' % len(b)

402

403

def setbodygen(self, gen):

403

def setbodygen(self, gen):

404

"""Define the response body as a generator of bytes."""

404

"""Define the response body as a generator of bytes."""

405

self._verifybody()

405

self._verifybody()

406

self._bodygen = gen

406

self._bodygen = gen

407

408

def setbodywillwrite(self):

408

def setbodywillwrite(self):

409

"""Signal an intent to use write() to emit the response body.

409

"""Signal an intent to use write() to emit the response body.

410

411

**This is the least preferred way to send a body.**

411

**This is the least preferred way to send a body.**

412

413

It is preferred for WSGI applications to emit a generator of chunks

413

It is preferred for WSGI applications to emit a generator of chunks

414

constituting the response body. However, some consumers can't emit

414

constituting the response body. However, some consumers can't emit

415

data this way. So, WSGI provides a way to obtain a ``write(data)``

415

data this way. So, WSGI provides a way to obtain a ``write(data)``

416

function that can be used to synchronously perform an unbuffered

416

function that can be used to synchronously perform an unbuffered

417

write.

417

write.

418

419

Calling this function signals an intent to produce the body in this

419

Calling this function signals an intent to produce the body in this

420

manner.

420

manner.

421

"""

421

"""

422

self._verifybody()

422

self._verifybody()

423

self._bodywillwrite = True

423

self._bodywillwrite = True

424

425

def sendresponse(self):

425

def sendresponse(self):

426

"""Send the generated response to the client.

426

"""Send the generated response to the client.

427

428

Before this is called, ``status`` must be set and one of

428

Before this is called, ``status`` must be set and one of

429

``setbodybytes()`` or ``setbodygen()`` must be called.

429

``setbodybytes()`` or ``setbodygen()`` must be called.

430

431

Calling this method multiple times is not allowed.

431

Calling this method multiple times is not allowed.

432

"""

432

"""

433

if self._started:

433

if self._started:

434

raise error.ProgrammingError('sendresponse() called multiple times')

434

raise error.ProgrammingError('sendresponse() called multiple times')

435

436

self._started = True

436

self._started = True

437

438

if not self.status:

438

if not self.status:

439

raise error.ProgrammingError('status line not defined')

439

raise error.ProgrammingError('status line not defined')

440

441

if (self._bodybytes is None and self._bodygen is None

441

if (self._bodybytes is None and self._bodygen is None

442

and not self._bodywillwrite):

442

and not self._bodywillwrite):

443

raise error.ProgrammingError('response body not defined')

443

raise error.ProgrammingError('response body not defined')

444

445

# RFC 7232 Section 4.1 states that a 304 MUST generate one of

445

# RFC 7232 Section 4.1 states that a 304 MUST generate one of

446

# {Cache-Control, Content-Location, Date, ETag, Expires, Vary}

446

# {Cache-Control, Content-Location, Date, ETag, Expires, Vary}

447

# and SHOULD NOT generate other headers unless they could be used

447

# and SHOULD NOT generate other headers unless they could be used

448

# to guide cache updates. Furthermore, RFC 7230 Section 3.3.2

448

# to guide cache updates. Furthermore, RFC 7230 Section 3.3.2

449

# states that no response body can be issued. Content-Length can

449

# states that no response body can be issued. Content-Length can

450

# be sent. But if it is present, it should be the size of the response

450

# be sent. But if it is present, it should be the size of the response

451

# that wasn't transferred.

451

# that wasn't transferred.

452

if self.status.startswith('304 '):

452

if self.status.startswith('304 '):

453

# setbodybytes('') will set C-L to 0. This doesn't conform with the

453

# setbodybytes('') will set C-L to 0. This doesn't conform with the

454

# spec. So remove it.

454

# spec. So remove it.

455

if self.headers.get('Content-Length') == '0':

455

if self.headers.get('Content-Length') == '0':

456

del self.headers['Content-Length']

456

del self.headers['Content-Length']

457

458

# Strictly speaking, this is too strict. But until it causes

458

# Strictly speaking, this is too strict. But until it causes

459

# problems, let's be strict.

459

# problems, let's be strict.

460

badheaders = {k for k in self.headers.keys()

460

badheaders = {k for k in self.headers.keys()

461

if k.lower() not in ('date', 'etag', 'expires',

461

if k.lower() not in ('date', 'etag', 'expires',

462

'cache-control',

462

'cache-control',

463

'content-location',

463

'content-location',

464

'vary')}

464

'vary')}

465

if badheaders:

465

if badheaders:

466

raise error.ProgrammingError(

466

raise error.ProgrammingError(

467

'illegal header on 304 response: %s' %

467

'illegal header on 304 response: %s' %

468

', '.join(sorted(badheaders)))

468

', '.join(sorted(badheaders)))

469

470

if self._bodygen is not None or self._bodywillwrite:

470

if self._bodygen is not None or self._bodywillwrite:

471

raise error.ProgrammingError("must use setbodybytes('') with "

471

raise error.ProgrammingError("must use setbodybytes('') with "

472

"304 responses")

472

"304 responses")

473

474

# Various HTTP clients (notably httplib) won't read the HTTP response

474

# Various HTTP clients (notably httplib) won't read the HTTP response

475

# until the HTTP request has been sent in full. If servers (us) send a

475

# until the HTTP request has been sent in full. If servers (us) send a

476

# response before the HTTP request has been fully sent, the connection

476

# response before the HTTP request has been fully sent, the connection

477

# may deadlock because neither end is reading.

477

# may deadlock because neither end is reading.

478

#

478

#

479

# We work around this by "draining" the request data before

479

# We work around this by "draining" the request data before

480

# sending any response in some conditions.

480

# sending any response in some conditions.

481

drain = False

481

drain = False

482

close = False

482

close = False

483

484

# If the client sent Expect: 100-continue, we assume it is smart enough

484

# If the client sent Expect: 100-continue, we assume it is smart enough

485

# to deal with the server sending a response before reading the request.

485

# to deal with the server sending a response before reading the request.

486

# (httplib doesn't do this.)

486

# (httplib doesn't do this.)

487

if self._req.headers.get('Expect', '').lower() == '100-continue':

487

if self._req.headers.get('Expect', '').lower() == '100-continue':

488

pass

488

pass

489

# Only tend to request methods that have bodies. Strictly speaking,

489

# Only tend to request methods that have bodies. Strictly speaking,

490

# we should sniff for a body. But this is fine for our existing

490

# we should sniff for a body. But this is fine for our existing

491

# WSGI applications.

491

# WSGI applications.

492

elif self._req.method not in ('POST', 'PUT'):

492

elif self._req.method not in ('POST', 'PUT'):

493

pass

493

pass

494

else:

494

else:

495

# If we don't know how much data to read, there's no guarantee

495

# If we don't know how much data to read, there's no guarantee

496

# that we can drain the request responsibly. The WSGI

496

# that we can drain the request responsibly. The WSGI

497

# specification only says that servers *should* ensure the

497

# specification only says that servers *should* ensure the

498

# input stream doesn't overrun the actual request. So there's

498

# input stream doesn't overrun the actual request. So there's

499

# no guarantee that reading until EOF won't corrupt the stream

499

# no guarantee that reading until EOF won't corrupt the stream

500

# state.

500

# state.

501

if not isinstance(self._req.bodyfh, util.cappedreader):

501

if not isinstance(self._req.bodyfh, util.cappedreader):

502

close = True

502

close = True

503

else:

503

else:

504

# We /could/ only drain certain HTTP response codes. But 200 and

504

# We /could/ only drain certain HTTP response codes. But 200 and

505

# non-200 wire protocol responses both require draining. Since

505

# non-200 wire protocol responses both require draining. Since

506

# we have a capped reader in place for all situations where we

506

# we have a capped reader in place for all situations where we

507

# drain, it is safe to read from that stream. We'll either do

507

# drain, it is safe to read from that stream. We'll either do

508

# a drain or no-op if we're already at EOF.

508

# a drain or no-op if we're already at EOF.

509

drain = True

509

drain = True

510

511

if close:

511

if close:

512

self.headers['Connection'] = 'Close'

512

self.headers['Connection'] = 'Close'

513

514

if drain:

514

if drain:

515

assert isinstance(self._req.bodyfh, util.cappedreader)

515

assert isinstance(self._req.bodyfh, util.cappedreader)

516

while True:

516

while True:

517

chunk = self._req.bodyfh.read(32768)

517

chunk = self._req.bodyfh.read(32768)

518

if not chunk:

518

if not chunk:

519

break

519

break

520

521

strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for

522

k, v in self.headers.items()]

521

write = self._startresponse(pycompat.sysstr(self.status),

523

write = self._startresponse(pycompat.sysstr(self.status),

522

s~~elf~~.headers.~~items~~())

524

strheaders)

523

525

524

if self._bodybytes:

526

if self._bodybytes:

525

yield self._bodybytes

527

yield self._bodybytes

526

elif self._bodygen:

528

elif self._bodygen:

527

for chunk in self._bodygen:

529

for chunk in self._bodygen:

528

yield chunk

530

yield chunk

529

elif self._bodywillwrite:

531

elif self._bodywillwrite:

530

self._bodywritefn = write

532

self._bodywritefn = write

531

else:

533

else:

532

error.ProgrammingError('do not know how to send body')

534

error.ProgrammingError('do not know how to send body')

533

535

534

def getbodyfile(self):

536

def getbodyfile(self):

535

"""Obtain a file object like object representing the response body.

537

"""Obtain a file object like object representing the response body.

536

538

537

For this to work, you must call ``setbodywillwrite()`` and then

539

For this to work, you must call ``setbodywillwrite()`` and then

538

``sendresponse()`` first. ``sendresponse()`` is a generator and the

540

``sendresponse()`` first. ``sendresponse()`` is a generator and the

539

function won't run to completion unless the generator is advanced. The

541

function won't run to completion unless the generator is advanced. The

540

generator yields not items. The easiest way to consume it is with

542

generator yields not items. The easiest way to consume it is with

541

``list(res.sendresponse())``, which should resolve to an empty list -

543

``list(res.sendresponse())``, which should resolve to an empty list -

542

``[]``.

544

``[]``.

543

"""

545

"""

544

if not self._bodywillwrite:

546

if not self._bodywillwrite:

545

raise error.ProgrammingError('must call setbodywillwrite() first')

547

raise error.ProgrammingError('must call setbodywillwrite() first')

546

548

547

if not self._started:

549

if not self._started:

548

raise error.ProgrammingError('must call sendresponse() first; did '

550

raise error.ProgrammingError('must call sendresponse() first; did '

549

'you remember to consume it since it '

551

'you remember to consume it since it '

550

'is a generator?')

552

'is a generator?')

551

553

552

assert self._bodywritefn

554

assert self._bodywritefn

553

return offsettrackingwriter(self._bodywritefn)

555

return offsettrackingwriter(self._bodywritefn)

554

556

555

def wsgiapplication(app_maker):

557

def wsgiapplication(app_maker):

556

'''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

558

'''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

557

can and should now be used as a WSGI application.'''

559

can and should now be used as a WSGI application.'''

558

application = app_maker()

560

application = app_maker()

559

def run_wsgi(env, respond):

561

def run_wsgi(env, respond):

560

return application(env, respond)

562

return application(env, respond)

561

return run_wsgi

563

return run_wsgi

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # hgweb/request.py - An http request from either CGI or the standalone server.
             #
             # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import wsgiref.headers as wsgiheaders
             #import wsgiref.validate
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 pycompat,
                 util,
             )
             class multidict(object):
                 """A dict like object that can store multiple values for a key.
                 Used to store parsed request parameters.
                 This is inspired by WebOb's class of the same name.
                 """
                 def __init__(self):
                     self._items = {}
                 def __getitem__(self, key):
                     """Returns the last set value for a key."""
                     return self._items[key][-1]
                 def __setitem__(self, key, value):
                     """Replace a values for a key with a new value."""
                     self._items[key] = [value]
                 def __delitem__(self, key):
                     """Delete all values for a key."""
                     del self._items[key]
                 def __contains__(self, key):
                     return key in self._items
                 def __len__(self):
                     return len(self._items)
                 def get(self, key, default=None):
                     try:
                         return self.__getitem__(key)
                     except KeyError:
                         return default
                 def add(self, key, value):
                     """Add a new value for a key. Does not replace existing values."""
                     self._items.setdefault(key, []).append(value)
                 def getall(self, key):
                     """Obtains all values for a key."""
                     return self._items.get(key, [])
                 def getone(self, key):
                     """Obtain a single value for a key.
                     Raises KeyError if key not defined or it has multiple values set.
                     """
                     vals = self._items[key]
                     if len(vals) > 1:
                         raise KeyError('multiple values for %r' % key)
                     return vals[0]
                 def asdictoflists(self):
                     return {k: list(v) for k, v in self._items.iteritems()}
             @attr.s(frozen=True)
             class parsedrequest(object):
                 """Represents a parsed WSGI request.
                 Contains both parsed parameters as well as a handle on the input stream.
                 """
                 # Request method.
                 method = attr.ib()
                 # Full URL for this request.
                 url = attr.ib()
                 # URL without any path components. Just <proto>://<host><port>.
                 baseurl = attr.ib()
                 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
                 # of HTTP: Host header for hostname. This is likely what clients used.
                 advertisedurl = attr.ib()
                 advertisedbaseurl = attr.ib()
                 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
                 urlscheme = attr.ib()
                 # Value of REMOTE_USER, if set, or None.
                 remoteuser = attr.ib()
                 # Value of REMOTE_HOST, if set, or None.
                 remotehost = attr.ib()
                 # Relative WSGI application path. If defined, will begin with a
                 # ``/``.
                 apppath = attr.ib()
                 # List of path parts to be used for dispatch.
                 dispatchparts = attr.ib()
                 # URL path component (no query string) used for dispatch. Can be
                 # ``None`` to signal no path component given to the request, an
                 # empty string to signal a request to the application's root URL,
                 # or a string not beginning with ``/`` containing the requested
                 # path under the application.
                 dispatchpath = attr.ib()
                 # The name of the repository being accessed.
                 reponame = attr.ib()
                 # Raw query string (part after "?" in URL).
                 querystring = attr.ib()
                 # multidict of query string parameters.
                 qsparams = attr.ib()
                 # wsgiref.headers.Headers instance. Operates like a dict with case
                 # insensitive keys.
                 headers = attr.ib()
                 # Request body input stream.
                 bodyfh = attr.ib()
                 # WSGI environment dict, unmodified.
                 rawenv = attr.ib()
             def parserequestfromenv(env, reponame=None, altbaseurl=None):
                 """Parse URL components from environment variables.
                 WSGI defines request attributes via environment variables. This function
                 parses the environment variables into a data structure.
                 If ``reponame`` is defined, the leading path components matching that
                 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
                 This simulates the world view of a WSGI application that processes
                 requests from the base URL of a repo.
                 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
                 is defined, it is used - instead of the WSGI environment variables - for
                 constructing URL components up to and including the WSGI application path.
                 For example, if the current WSGI application is at ``/repo`` and a request
                 is made to ``/rev/@`` with this argument set to
                 ``http://myserver:9000/prefix``, the URL and path components will resolve as
                 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
                 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
                 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
                 """
                 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
                 # We first validate that the incoming object conforms with the WSGI spec.
                 # We only want to be dealing with spec-conforming WSGI implementations.
                 # TODO enable this once we fix internal violations.
                 #wsgiref.validate.check_environ(env)
                 # PEP-0333 states that environment keys and values are native strings
                 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
                 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
                 # in Mercurial, so mass convert string keys and values to bytes.
                 if pycompat.ispy3:
                     env = {k.encode('latin-1'): v for k, v in env.iteritems()}
                     env = {k: v.encode('latin-1') if isinstance(v, str) else v
                            for k, v in env.iteritems()}
                 if altbaseurl:
                     altbaseurl = util.url(altbaseurl)
                 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
                 # the environment variables.
                 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
                 # how URLs are reconstructed.
                 fullurl = env['wsgi.url_scheme'] + '://'
                 if altbaseurl and altbaseurl.scheme:
                     advertisedfullurl = altbaseurl.scheme + '://'
                 else:
                     advertisedfullurl = fullurl
                 def addport(s, port):
                     if s.startswith('https://'):
                         if port != '443':
                             s += ':' + port
                     else:
                         if port != '80':
                             s += ':' + port
                     return s
                 if env.get('HTTP_HOST'):
                     fullurl += env['HTTP_HOST']
                 else:
                     fullurl += env['SERVER_NAME']
                     fullurl = addport(fullurl, env['SERVER_PORT'])
                 if altbaseurl and altbaseurl.host:
                     advertisedfullurl += altbaseurl.host
                     if altbaseurl.port:
                         port = altbaseurl.port
                     elif altbaseurl.scheme == 'http' and not altbaseurl.port:
                         port = '80'
                     elif altbaseurl.scheme == 'https' and not altbaseurl.port:
                         port = '443'
                     else:
                         port = env['SERVER_PORT']
                     advertisedfullurl = addport(advertisedfullurl, port)
                 else:
                     advertisedfullurl += env['SERVER_NAME']
                     advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
                 baseurl = fullurl
                 advertisedbaseurl = advertisedfullurl
                 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
                 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
                 if altbaseurl:
                     path = altbaseurl.path or ''
                     if path and not path.startswith('/'):
                         path = '/' + path
                     advertisedfullurl += util.urlreq.quote(path)
                 else:
                     advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
                 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
                 if env.get('QUERY_STRING'):
                     fullurl += '?' + env['QUERY_STRING']
                     advertisedfullurl += '?' + env['QUERY_STRING']
                 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
                 # that represents the repository being dispatched to. When computing
                 # the dispatch info, we ignore these leading path components.
                 if altbaseurl:
                     apppath = altbaseurl.path or ''
                     if apppath and not apppath.startswith('/'):
                         apppath = '/' + apppath
                 else:
                     apppath = env.get('SCRIPT_NAME', '')
                 if reponame:
                     repoprefix = '/' + reponame.strip('/')
                     if not env.get('PATH_INFO'):
                         raise error.ProgrammingError('reponame requires PATH_INFO')
                     if not env['PATH_INFO'].startswith(repoprefix):
                         raise error.ProgrammingError('PATH_INFO does not begin with repo '
                                                      'name: %s (%s)' % (env['PATH_INFO'],
                                                                         reponame))
                     dispatchpath = env['PATH_INFO'][len(repoprefix):]
                     if dispatchpath and not dispatchpath.startswith('/'):
                         raise error.ProgrammingError('reponame prefix of PATH_INFO does '
                                                      'not end at path delimiter: %s (%s)' %
                                                      (env['PATH_INFO'], reponame))
                     apppath = apppath.rstrip('/') + repoprefix
                     dispatchparts = dispatchpath.strip('/').split('/')
                     dispatchpath = '/'.join(dispatchparts)
                 elif 'PATH_INFO' in env:
                     if env['PATH_INFO'].strip('/'):
                         dispatchparts = env['PATH_INFO'].strip('/').split('/')
                         dispatchpath = '/'.join(dispatchparts)
                     else:
                         dispatchparts = []
                         dispatchpath = ''
                 else:
                     dispatchparts = []
                     dispatchpath = None
                 querystring = env.get('QUERY_STRING', '')
                 # We store as a list so we have ordering information. We also store as
                 # a dict to facilitate fast lookup.
                 qsparams = multidict()
                 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
                     qsparams.add(k, v)
                 # HTTP_* keys contain HTTP request headers. The Headers structure should
                 # perform case normalization for us. We just rewrite underscore to dash
                 # so keys match what likely went over the wire.
                 headers = []
                 for k, v in env.iteritems():
                     if k.startswith('HTTP_'):
                         headers.append((k[len('HTTP_'):].replace('_', '-'), v))
                 headers = wsgiheaders.Headers(headers)
                 # This is kind of a lie because the HTTP header wasn't explicitly
                 # sent. But for all intents and purposes it should be OK to lie about
                 # this, since a consumer will either either value to determine how many
                 # bytes are available to read.
                 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
                     headers['Content-Length'] = env['CONTENT_LENGTH']
                 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
                     headers['Content-Type'] = env['CONTENT_TYPE']
                 bodyfh = env['wsgi.input']
                 if 'Content-Length' in headers:
                     bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
                 return parsedrequest(method=env['REQUEST_METHOD'],
                                      url=fullurl, baseurl=baseurl,
                                      advertisedurl=advertisedfullurl,
                                      advertisedbaseurl=advertisedbaseurl,
                                      urlscheme=env['wsgi.url_scheme'],
                                      remoteuser=env.get('REMOTE_USER'),
                                      remotehost=env.get('REMOTE_HOST'),
                                      apppath=apppath,
                                      dispatchparts=dispatchparts, dispatchpath=dispatchpath,
                                      reponame=reponame,
                                      querystring=querystring,
                                      qsparams=qsparams,
                                      headers=headers,
                                      bodyfh=bodyfh,
                                      rawenv=env)
             class offsettrackingwriter(object):
                 """A file object like object that is append only and tracks write count.
                 Instances are bound to a callable. This callable is called with data
                 whenever a ``write()`` is attempted.
                 Instances track the amount of written data so they can answer ``tell()``
                 requests.
                 The intent of this class is to wrap the ``write()`` function returned by
                 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
                 not a file object, it doesn't implement other file object methods.
                 """
                 def __init__(self, writefn):
                     self._write = writefn
                     self._offset = 0
                 def write(self, s):
                     res = self._write(s)
                     # Some Python objects don't report the number of bytes written.
                     if res is None:
                         self._offset += len(s)
                     else:
                         self._offset += res
                 def flush(self):
                     pass
                 def tell(self):
                     return self._offset
             class wsgiresponse(object):
                 """Represents a response to a WSGI request.
                 A response consists of a status line, headers, and a body.
                 Consumers must populate the ``status`` and ``headers`` fields and
                 make a call to a ``setbody*()`` method before the response can be
                 issued.
                 When it is time to start sending the response over the wire,
                 ``sendresponse()`` is called. It handles emitting the header portion
                 of the response message. It then yields chunks of body data to be
                 written to the peer. Typically, the WSGI application itself calls
                 and returns the value from ``sendresponse()``.
                 """
                 def __init__(self, req, startresponse):
                     """Create an empty response tied to a specific request.
                     ``req`` is a ``parsedrequest``. ``startresponse`` is the
                     ``start_response`` function passed to the WSGI application.
                     """
                     self._req = req
                     self._startresponse = startresponse
                     self.status = None
                     self.headers = wsgiheaders.Headers([])
                     self._bodybytes = None
                     self._bodygen = None
                     self._bodywillwrite = False
                     self._started = False
                     self._bodywritefn = None
                 def _verifybody(self):
                     if (self._bodybytes is not None or self._bodygen is not None
                         or self._bodywillwrite):
                         raise error.ProgrammingError('cannot define body multiple times')
                 def setbodybytes(self, b):
                     """Define the response body as static bytes.
                     The empty string signals that there is no response body.
                     """
                     self._verifybody()
                     self._bodybytes = b
                     self.headers['Content-Length'] = '%d' % len(b)
                 def setbodygen(self, gen):
                     """Define the response body as a generator of bytes."""
                     self._verifybody()
                     self._bodygen = gen
                 def setbodywillwrite(self):
                     """Signal an intent to use write() to emit the response body.
                     **This is the least preferred way to send a body.**
                     It is preferred for WSGI applications to emit a generator of chunks
                     constituting the response body. However, some consumers can't emit
                     data this way. So, WSGI provides a way to obtain a ``write(data)``
                     function that can be used to synchronously perform an unbuffered
                     write.
                     Calling this function signals an intent to produce the body in this
                     manner.
                     """
                     self._verifybody()
                     self._bodywillwrite = True
                 def sendresponse(self):
                     """Send the generated response to the client.
                     Before this is called, ``status`` must be set and one of
                     ``setbodybytes()`` or ``setbodygen()`` must be called.
                     Calling this method multiple times is not allowed.
                     """
                     if self._started:
                         raise error.ProgrammingError('sendresponse() called multiple times')
                     self._started = True
                     if not self.status:
                         raise error.ProgrammingError('status line not defined')
                     if (self._bodybytes is None and self._bodygen is None
                         and not self._bodywillwrite):
                         raise error.ProgrammingError('response body not defined')
                     # RFC 7232 Section 4.1 states that a 304 MUST generate one of
                     # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
                     # and SHOULD NOT generate other headers unless they could be used
                     # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
                     # states that no response body can be issued. Content-Length can
                     # be sent. But if it is present, it should be the size of the response
                     # that wasn't transferred.
                     if self.status.startswith('304 '):
                         # setbodybytes('') will set C-L to 0. This doesn't conform with the
                         # spec. So remove it.
                         if self.headers.get('Content-Length') == '0':
                             del self.headers['Content-Length']
                         # Strictly speaking, this is too strict. But until it causes
                         # problems, let's be strict.
                         badheaders = {k for k in self.headers.keys()
                                       if k.lower() not in ('date', 'etag', 'expires',
                                                            'cache-control',
                                                            'content-location',
                                                            'vary')}
                         if badheaders:
                             raise error.ProgrammingError(
                                 'illegal header on 304 response: %s' %
                                 ', '.join(sorted(badheaders)))
                         if self._bodygen is not None or self._bodywillwrite:
                             raise error.ProgrammingError("must use setbodybytes('') with "
                                                          "304 responses")
                     # Various HTTP clients (notably httplib) won't read the HTTP response
                     # until the HTTP request has been sent in full. If servers (us) send a
                     # response before the HTTP request has been fully sent, the connection
                     # may deadlock because neither end is reading.
                     #
                     # We work around this by "draining" the request data before
                     # sending any response in some conditions.
                     drain = False
                     close = False
                     # If the client sent Expect: 100-continue, we assume it is smart enough
                     # to deal with the server sending a response before reading the request.
                     # (httplib doesn't do this.)
                     if self._req.headers.get('Expect', '').lower() == '100-continue':
                         pass
                     # Only tend to request methods that have bodies. Strictly speaking,
                     # we should sniff for a body. But this is fine for our existing
                     # WSGI applications.
                     elif self._req.method not in ('POST', 'PUT'):
                         pass
                     else:
                         # If we don't know how much data to read, there's no guarantee
                         # that we can drain the request responsibly. The WSGI
                         # specification only says that servers *should* ensure the
                         # input stream doesn't overrun the actual request. So there's
                         # no guarantee that reading until EOF won't corrupt the stream
                         # state.
                         if not isinstance(self._req.bodyfh, util.cappedreader):
                             close = True
                         else:
                             # We /could/ only drain certain HTTP response codes. But 200 and
                             # non-200 wire protocol responses both require draining. Since
                             # we have a capped reader in place for all situations where we
                             # drain, it is safe to read from that stream. We'll either do
                             # a drain or no-op if we're already at EOF.
                             drain = True
                     if close:
                         self.headers['Connection'] = 'Close'
                     if drain:
                         assert isinstance(self._req.bodyfh, util.cappedreader)
                         while True:
                             chunk = self._req.bodyfh.read(32768)
                             if not chunk:
                                 break
+                    strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
+                                  k, v in self.headers.items()]
                     write = self._startresponse(pycompat.sysstr(self.status),
-                                                self.headers.items())
+                                                strheaders)
                     if self._bodybytes:
                         yield self._bodybytes
                     elif self._bodygen:
                         for chunk in self._bodygen:
                             yield chunk
                     elif self._bodywillwrite:
                         self._bodywritefn = write
                     else:
                         error.ProgrammingError('do not know how to send body')
                 def getbodyfile(self):
                     """Obtain a file object like object representing the response body.
                     For this to work, you must call ``setbodywillwrite()`` and then
                     ``sendresponse()`` first. ``sendresponse()`` is a generator and the
                     function won't run to completion unless the generator is advanced. The
                     generator yields not items. The easiest way to consume it is with
                     ``list(res.sendresponse())``, which should resolve to an empty list -
                     ``[]``.
                     """
                     if not self._bodywillwrite:
                         raise error.ProgrammingError('must call setbodywillwrite() first')
                     if not self._started:
                         raise error.ProgrammingError('must call sendresponse() first; did '
                                                      'you remember to consume it since it '
                                                      'is a generator?')
                     assert self._bodywritefn
                     return offsettrackingwriter(self._bodywritefn)
             def wsgiapplication(app_maker):
                 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
                 can and should now be used as a WSGI application.'''
                 application = app_maker()
                 def run_wsgi(env, respond):
                     return application(env, respond)
                 return run_wsgi