upstream/mercurial-mirror Commit - r37843:e82b137a

1

# hgweb/request.py - An http request from either CGI or the standalone server.

1

# hgweb/request.py - An http request from either CGI or the standalone server.

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

#import wsgiref.validate

11

#import wsgiref.validate

12

13

from ..thirdparty import (

13

from ..thirdparty import (

14

attr,

14

attr,

15

)

15

)

16

from .. import (

16

from .. import (

17

error,

17

error,

18

pycompat,

18

pycompat,

19

util,

19

util,

20

)

20

)

21

22

class multidict(object):

22

class multidict(object):

23

"""A dict like object that can store multiple values for a key.

23

"""A dict like object that can store multiple values for a key.

24

25

Used to store parsed request parameters.

25

Used to store parsed request parameters.

26

27

This is inspired by WebOb's class of the same name.

27

This is inspired by WebOb's class of the same name.

28

"""

28

"""

29

def __init__(self):

29

def __init__(self):

30

self._items = {}

30

self._items = {}

31

32

def __getitem__(self, key):

32

def __getitem__(self, key):

33

"""Returns the last set value for a key."""

33

"""Returns the last set value for a key."""

34

return self._items[key][-1]

34

return self._items[key][-1]

35

36

def __setitem__(self, key, value):

36

def __setitem__(self, key, value):

37

"""Replace a values for a key with a new value."""

37

"""Replace a values for a key with a new value."""

38

self._items[key] = [value]

38

self._items[key] = [value]

39

40

def __delitem__(self, key):

40

def __delitem__(self, key):

41

"""Delete all values for a key."""

41

"""Delete all values for a key."""

42

del self._items[key]

42

del self._items[key]

43

44

def __contains__(self, key):

44

def __contains__(self, key):

45

return key in self._items

45

return key in self._items

46

47

def __len__(self):

47

def __len__(self):

48

return len(self._items)

48

return len(self._items)

49

50

def get(self, key, default=None):

50

def get(self, key, default=None):

51

try:

51

try:

52

return self.__getitem__(key)

52

return self.__getitem__(key)

53

except KeyError:

53

except KeyError:

54

return default

54

return default

55

56

def add(self, key, value):

56

def add(self, key, value):

57

"""Add a new value for a key. Does not replace existing values."""

57

"""Add a new value for a key. Does not replace existing values."""

58

self._items.setdefault(key, []).append(value)

58

self._items.setdefault(key, []).append(value)

59

60

def getall(self, key):

60

def getall(self, key):

61

"""Obtains all values for a key."""

61

"""Obtains all values for a key."""

62

return self._items.get(key, [])

62

return self._items.get(key, [])

63

64

def getone(self, key):

64

def getone(self, key):

65

"""Obtain a single value for a key.

65

"""Obtain a single value for a key.

66

67

Raises KeyError if key not defined or it has multiple values set.

67

Raises KeyError if key not defined or it has multiple values set.

68

"""

68

"""

69

vals = self._items[key]

69

vals = self._items[key]

70

71

if len(vals) > 1:

71

if len(vals) > 1:

72

raise KeyError('multiple values for %r' % key)

72

raise KeyError('multiple values for %r' % key)

73

74

return vals[0]

74

return vals[0]

75

76

def asdictoflists(self):

76

def asdictoflists(self):

77

return {k: list(v) for k, v in self._items.iteritems()}

77

return {k: list(v) for k, v in self._items.iteritems()}

78

79

@attr.s(frozen=True)

79

@attr.s(frozen=True)

80

class parsedrequest(object):

80

class parsedrequest(object):

81

"""Represents a parsed WSGI request.

81

"""Represents a parsed WSGI request.

82

83

Contains both parsed parameters as well as a handle on the input stream.

83

Contains both parsed parameters as well as a handle on the input stream.

84

"""

84

"""

85

86

# Request method.

86

# Request method.

87

method = attr.ib()

87

method = attr.ib()

88

# Full URL for this request.

88

# Full URL for this request.

89

url = attr.ib()

89

url = attr.ib()

90

# URL without any path components. Just <proto>://<host><port>.

90

# URL without any path components. Just <proto>://<host><port>.

91

baseurl = attr.ib()

91

baseurl = attr.ib()

92

# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead

92

# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead

93

# of HTTP: Host header for hostname. This is likely what clients used.

93

# of HTTP: Host header for hostname. This is likely what clients used.

94

advertisedurl = attr.ib()

94

advertisedurl = attr.ib()

95

advertisedbaseurl = attr.ib()

95

advertisedbaseurl = attr.ib()

96

# URL scheme (part before ``://``). e.g. ``http`` or ``https``.

96

# URL scheme (part before ``://``). e.g. ``http`` or ``https``.

97

urlscheme = attr.ib()

97

urlscheme = attr.ib()

98

# Value of REMOTE_USER, if set, or None.

98

# Value of REMOTE_USER, if set, or None.

99

remoteuser = attr.ib()

99

remoteuser = attr.ib()

100

# Value of REMOTE_HOST, if set, or None.

100

# Value of REMOTE_HOST, if set, or None.

101

remotehost = attr.ib()

101

remotehost = attr.ib()

102

# Relative WSGI application path. If defined, will begin with a

102

# Relative WSGI application path. If defined, will begin with a

103

# ``/``.

103

# ``/``.

104

apppath = attr.ib()

104

apppath = attr.ib()

105

# List of path parts to be used for dispatch.

105

# List of path parts to be used for dispatch.

106

dispatchparts = attr.ib()

106

dispatchparts = attr.ib()

107

# URL path component (no query string) used for dispatch. Can be

107

# URL path component (no query string) used for dispatch. Can be

108

# ``None`` to signal no path component given to the request, an

108

# ``None`` to signal no path component given to the request, an

109

# empty string to signal a request to the application's root URL,

109

# empty string to signal a request to the application's root URL,

110

# or a string not beginning with ``/`` containing the requested

110

# or a string not beginning with ``/`` containing the requested

111

# path under the application.

111

# path under the application.

112

dispatchpath = attr.ib()

112

dispatchpath = attr.ib()

113

# The name of the repository being accessed.

113

# The name of the repository being accessed.

114

reponame = attr.ib()

114

reponame = attr.ib()

115

# Raw query string (part after "?" in URL).

115

# Raw query string (part after "?" in URL).

116

querystring = attr.ib()

116

querystring = attr.ib()

117

# multidict of query string parameters.

117

# multidict of query string parameters.

118

qsparams = attr.ib()

118

qsparams = attr.ib()

119

# wsgiref.headers.Headers instance. Operates like a dict with case

119

# wsgiref.headers.Headers instance. Operates like a dict with case

120

# insensitive keys.

120

# insensitive keys.

121

headers = attr.ib()

121

headers = attr.ib()

122

# Request body input stream.

122

# Request body input stream.

123

bodyfh = attr.ib()

123

bodyfh = attr.ib()

124

# WSGI environment dict, unmodified.

124

# WSGI environment dict, unmodified.

125

rawenv = attr.ib()

125

rawenv = attr.ib()

126

127

def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):

127

def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):

128

"""Parse URL components from environment variables.

128

"""Parse URL components from environment variables.

129

130

WSGI defines request attributes via environment variables. This function

130

WSGI defines request attributes via environment variables. This function

131

parses the environment variables into a data structure.

131

parses the environment variables into a data structure.

132

133

If ``reponame`` is defined, the leading path components matching that

133

If ``reponame`` is defined, the leading path components matching that

134

string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.

134

string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.

135

This simulates the world view of a WSGI application that processes

135

This simulates the world view of a WSGI application that processes

136

requests from the base URL of a repo.

136

requests from the base URL of a repo.

137

138

If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)

138

If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)

139

is defined, it is used - instead of the WSGI environment variables - for

139

is defined, it is used - instead of the WSGI environment variables - for

140

constructing URL components up to and including the WSGI application path.

140

constructing URL components up to and including the WSGI application path.

141

For example, if the current WSGI application is at ``/repo`` and a request

141

For example, if the current WSGI application is at ``/repo`` and a request

142

is made to ``/rev/@`` with this argument set to

142

is made to ``/rev/@`` with this argument set to

143

``http://myserver:9000/prefix``, the URL and path components will resolve as

143

``http://myserver:9000/prefix``, the URL and path components will resolve as

144

if the request were to ``http://myserver:9000/prefix/rev/@``. In other

144

if the request were to ``http://myserver:9000/prefix/rev/@``. In other

145

words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and

145

words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and

146

``SCRIPT_NAME`` are all effectively replaced by components from this URL.

146

``SCRIPT_NAME`` are all effectively replaced by components from this URL.

147

148

``bodyfh`` can be used to specify a file object to read the request body

148

``bodyfh`` can be used to specify a file object to read the request body

149

from. If not defined, ``wsgi.input`` from the environment dict is used.

149

from. If not defined, ``wsgi.input`` from the environment dict is used.

150

"""

150

"""

151

# PEP 3333 defines the WSGI spec and is a useful reference for this code.

151

# PEP 3333 defines the WSGI spec and is a useful reference for this code.

152

153

# We first validate that the incoming object conforms with the WSGI spec.

153

# We first validate that the incoming object conforms with the WSGI spec.

154

# We only want to be dealing with spec-conforming WSGI implementations.

154

# We only want to be dealing with spec-conforming WSGI implementations.

155

# TODO enable this once we fix internal violations.

155

# TODO enable this once we fix internal violations.

156

#wsgiref.validate.check_environ(env)

156

#wsgiref.validate.check_environ(env)

157

158

# PEP-0333 states that environment keys and values are native strings

158

# PEP-0333 states that environment keys and values are native strings

159

# (bytes on Python 2 and str on Python 3). The code points for the Unicode

159

# (bytes on Python 2 and str on Python 3). The code points for the Unicode

160

# strings on Python 3 must be between \00000-\000FF. We deal with bytes

160

# strings on Python 3 must be between \00000-\000FF. We deal with bytes

161

# in Mercurial, so mass convert string keys and values to bytes.

161

# in Mercurial, so mass convert string keys and values to bytes.

162

if pycompat.ispy3:

162

if pycompat.ispy3:

163

env = {k.encode('latin-1'): v for k, v in env.iteritems()}

163

env = {k.encode('latin-1'): v for k, v in env.iteritems()}

164

env = {k: v.encode('latin-1') if isinstance(v, str) else v

164

env = {k: v.encode('latin-1') if isinstance(v, str) else v

165

for k, v in env.iteritems()}

165

for k, v in env.iteritems()}

166

167

# Some hosting solutions are emulating hgwebdir, and dispatching directly

167

# Some hosting solutions are emulating hgwebdir, and dispatching directly

168

# to an hgweb instance using this environment variable. This was always

168

# to an hgweb instance using this environment variable. This was always

169

# checked prior to d7fd203e36cc; keep doing so to avoid breaking them.

169

# checked prior to d7fd203e36cc; keep doing so to avoid breaking them.

170

if not reponame:

170

if not reponame:

171

reponame = env.get('REPO_NAME')

171

reponame = env.get('REPO_NAME')

172

173

if altbaseurl:

173

if altbaseurl:

174

altbaseurl = util.url(altbaseurl)

174

altbaseurl = util.url(altbaseurl)

175

176

# https://www.python.org/dev/peps/pep-0333/#environ-variables defines

176

# https://www.python.org/dev/peps/pep-0333/#environ-variables defines

177

# the environment variables.

177

# the environment variables.

178

# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines

178

# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines

179

# how URLs are reconstructed.

179

# how URLs are reconstructed.

180

fullurl = env['wsgi.url_scheme'] + '://'

180

fullurl = env['wsgi.url_scheme'] + '://'

181

182

if altbaseurl and altbaseurl.scheme:

182

if altbaseurl and altbaseurl.scheme:

183

advertisedfullurl = altbaseurl.scheme + '://'

183

advertisedfullurl = altbaseurl.scheme + '://'

184

else:

184

else:

185

advertisedfullurl = fullurl

185

advertisedfullurl = fullurl

186

187

def addport(s, port):

187

def addport(s, port):

188

if s.startswith('https://'):

188

if s.startswith('https://'):

189

if port != '443':

189

if port != '443':

190

s += ':' + port

190

s += ':' + port

191

else:

191

else:

192

if port != '80':

192

if port != '80':

193

s += ':' + port

193

s += ':' + port

194

195

return s

195

return s

196

197

if env.get('HTTP_HOST'):

197

if env.get('HTTP_HOST'):

198

fullurl += env['HTTP_HOST']

198

fullurl += env['HTTP_HOST']

199

else:

199

else:

200

fullurl += env['SERVER_NAME']

200

fullurl += env['SERVER_NAME']

201

fullurl = addport(fullurl, env['SERVER_PORT'])

201

fullurl = addport(fullurl, env['SERVER_PORT'])

202

203

if altbaseurl and altbaseurl.host:

203

if altbaseurl and altbaseurl.host:

204

advertisedfullurl += altbaseurl.host

204

advertisedfullurl += altbaseurl.host

205

206

if altbaseurl.port:

206

if altbaseurl.port:

207

port = altbaseurl.port

207

port = altbaseurl.port

208

elif altbaseurl.scheme == 'http' and not altbaseurl.port:

208

elif altbaseurl.scheme == 'http' and not altbaseurl.port:

209

port = '80'

209

port = '80'

210

elif altbaseurl.scheme == 'https' and not altbaseurl.port:

210

elif altbaseurl.scheme == 'https' and not altbaseurl.port:

211

port = '443'

211

port = '443'

212

else:

212

else:

213

port = env['SERVER_PORT']

213

port = env['SERVER_PORT']

214

215

advertisedfullurl = addport(advertisedfullurl, port)

215

advertisedfullurl = addport(advertisedfullurl, port)

216

else:

216

else:

217

advertisedfullurl += env['SERVER_NAME']

217

advertisedfullurl += env['SERVER_NAME']

218

advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])

218

advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])

219

220

baseurl = fullurl

220

baseurl = fullurl

221

advertisedbaseurl = advertisedfullurl

221

advertisedbaseurl = advertisedfullurl

222

223

fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

223

fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

224

fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

224

fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

225

226

if altbaseurl:

226

if altbaseurl:

227

path = altbaseurl.path or ''

227

path = altbaseurl.path or ''

228

if path and not path.startswith('/'):

228

if path and not path.startswith('/'):

229

path = '/' + path

229

path = '/' + path

230

advertisedfullurl += util.urlreq.quote(path)

230

advertisedfullurl += util.urlreq.quote(path)

231

else:

231

else:

232

advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

232

advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

233

234

advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

234

advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

235

236

if env.get('QUERY_STRING'):

236

if env.get('QUERY_STRING'):

237

fullurl += '?' + env['QUERY_STRING']

237

fullurl += '?' + env['QUERY_STRING']

238

advertisedfullurl += '?' + env['QUERY_STRING']

238

advertisedfullurl += '?' + env['QUERY_STRING']

239

240

# If ``reponame`` is defined, that must be a prefix on PATH_INFO

240

# If ``reponame`` is defined, that must be a prefix on PATH_INFO

241

# that represents the repository being dispatched to. When computing

241

# that represents the repository being dispatched to. When computing

242

# the dispatch info, we ignore these leading path components.

242

# the dispatch info, we ignore these leading path components.

243

244

if altbaseurl:

244

if altbaseurl:

245

apppath = altbaseurl.path or ''

245

apppath = altbaseurl.path or ''

246

if apppath and not apppath.startswith('/'):

246

if apppath and not apppath.startswith('/'):

247

apppath = '/' + apppath

247

apppath = '/' + apppath

248

else:

248

else:

249

apppath = env.get('SCRIPT_NAME', '')

249

apppath = env.get('SCRIPT_NAME', '')

250

251

if reponame:

251

if reponame:

252

repoprefix = '/' + reponame.strip('/')

252

repoprefix = '/' + reponame.strip('/')

253

254

if not env.get('PATH_INFO'):

254

if not env.get('PATH_INFO'):

255

raise error.ProgrammingError('reponame requires PATH_INFO')

255

raise error.ProgrammingError('reponame requires PATH_INFO')

256

257

if not env['PATH_INFO'].startswith(repoprefix):

257

if not env['PATH_INFO'].startswith(repoprefix):

258

raise error.ProgrammingError('PATH_INFO does not begin with repo '

258

raise error.ProgrammingError('PATH_INFO does not begin with repo '

259

'name: %s (%s)' % (env['PATH_INFO'],

259

'name: %s (%s)' % (env['PATH_INFO'],

260

reponame))

260

reponame))

261

262

dispatchpath = env['PATH_INFO'][len(repoprefix):]

262

dispatchpath = env['PATH_INFO'][len(repoprefix):]

263

264

if dispatchpath and not dispatchpath.startswith('/'):

264

if dispatchpath and not dispatchpath.startswith('/'):

265

raise error.ProgrammingError('reponame prefix of PATH_INFO does '

265

raise error.ProgrammingError('reponame prefix of PATH_INFO does '

266

'not end at path delimiter: %s (%s)' %

266

'not end at path delimiter: %s (%s)' %

267

(env['PATH_INFO'], reponame))

267

(env['PATH_INFO'], reponame))

268

269

apppath = apppath.rstrip('/') + repoprefix

269

apppath = apppath.rstrip('/') + repoprefix

270

dispatchparts = dispatchpath.strip('/').split('/')

270

dispatchparts = dispatchpath.strip('/').split('/')

271

dispatchpath = '/'.join(dispatchparts)

271

dispatchpath = '/'.join(dispatchparts)

272

273

elif 'PATH_INFO' in env:

273

elif 'PATH_INFO' in env:

274

if env['PATH_INFO'].strip('/'):

274

if env['PATH_INFO'].strip('/'):

275

dispatchparts = env['PATH_INFO'].strip('/').split('/')

275

dispatchparts = env['PATH_INFO'].strip('/').split('/')

276

dispatchpath = '/'.join(dispatchparts)

276

dispatchpath = '/'.join(dispatchparts)

277

else:

277

else:

278

dispatchparts = []

278

dispatchparts = []

279

dispatchpath = ''

279

dispatchpath = ''

280

else:

280

else:

281

dispatchparts = []

281

dispatchparts = []

282

dispatchpath = None

282

dispatchpath = None

283

284

querystring = env.get('QUERY_STRING', '')

284

querystring = env.get('QUERY_STRING', '')

285

286

# We store as a list so we have ordering information. We also store as

286

# We store as a list so we have ordering information. We also store as

287

# a dict to facilitate fast lookup.

287

# a dict to facilitate fast lookup.

288

qsparams = multidict()

288

qsparams = multidict()

289

for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):

289

for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):

290

qsparams.add(k, v)

290

qsparams.add(k, v)

291

292

# HTTP_* keys contain HTTP request headers. The Headers structure should

292

# HTTP_* keys contain HTTP request headers. The Headers structure should

293

# perform case normalization for us. We just rewrite underscore to dash

293

# perform case normalization for us. We just rewrite underscore to dash

294

# so keys match what likely went over the wire.

294

# so keys match what likely went over the wire.

295

headers = []

295

headers = []

296

for k, v in env.iteritems():

296

for k, v in env.iteritems():

297

if k.startswith('HTTP_'):

297

if k.startswith('HTTP_'):

298

headers.append((k[len('HTTP_'):].replace('_', '-'), v))

298

headers.append((k[len('HTTP_'):].replace('_', '-'), v))

299

300

from . import wsgiheaders # avoid cycle

300

from . import wsgiheaders # avoid cycle

301

headers = wsgiheaders.Headers(headers)

301

headers = wsgiheaders.Headers(headers)

302

303

# This is kind of a lie because the HTTP header wasn't explicitly

303

# This is kind of a lie because the HTTP header wasn't explicitly

304

# sent. But for all intents and purposes it should be OK to lie about

304

# sent. But for all intents and purposes it should be OK to lie about

305

# this, since a consumer will either either value to determine how many

305

# this, since a consumer will either either value to determine how many

306

# bytes are available to read.

306

# bytes are available to read.

307

if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:

307

if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:

308

headers['Content-Length'] = env['CONTENT_LENGTH']

308

headers['Content-Length'] = env['CONTENT_LENGTH']

309

310

if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:

310

if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:

311

headers['Content-Type'] = env['CONTENT_TYPE']

311

headers['Content-Type'] = env['CONTENT_TYPE']

312

313

if bodyfh is None:

313

if bodyfh is None:

314

bodyfh = env['wsgi.input']

314

bodyfh = env['wsgi.input']

315

if 'Content-Length' in headers:

315

if 'Content-Length' in headers:

316

bodyfh = util.cappedreader(bodyfh, ~~int~~(~~headers~~[~~'Content-Length']))~~

316

bodyfh = util.cappedreader(bodyfh,

317

int(headers['Content-Length'] or '0'))

317

318

return parsedrequest(method=env['REQUEST_METHOD'],

319

return parsedrequest(method=env['REQUEST_METHOD'],

319

url=fullurl, baseurl=baseurl,

320

url=fullurl, baseurl=baseurl,

320

advertisedurl=advertisedfullurl,

321

advertisedurl=advertisedfullurl,

321

advertisedbaseurl=advertisedbaseurl,

322

advertisedbaseurl=advertisedbaseurl,

322

urlscheme=env['wsgi.url_scheme'],

323

urlscheme=env['wsgi.url_scheme'],

323

remoteuser=env.get('REMOTE_USER'),

324

remoteuser=env.get('REMOTE_USER'),

324

remotehost=env.get('REMOTE_HOST'),

325

remotehost=env.get('REMOTE_HOST'),

325

apppath=apppath,

326

apppath=apppath,

326

dispatchparts=dispatchparts, dispatchpath=dispatchpath,

327

dispatchparts=dispatchparts, dispatchpath=dispatchpath,

327

reponame=reponame,

328

reponame=reponame,

328

querystring=querystring,

329

querystring=querystring,

329

qsparams=qsparams,

330

qsparams=qsparams,

330

headers=headers,

331

headers=headers,

331

bodyfh=bodyfh,

332

bodyfh=bodyfh,

332

rawenv=env)

333

rawenv=env)

333

334

class offsettrackingwriter(object):

335

class offsettrackingwriter(object):

335

"""A file object like object that is append only and tracks write count.

336

"""A file object like object that is append only and tracks write count.

336

337

Instances are bound to a callable. This callable is called with data

338

Instances are bound to a callable. This callable is called with data

338

whenever a ``write()`` is attempted.

339

whenever a ``write()`` is attempted.

339

340

Instances track the amount of written data so they can answer ``tell()``

341

Instances track the amount of written data so they can answer ``tell()``

341

requests.

342

requests.

342

343

The intent of this class is to wrap the ``write()`` function returned by

344

The intent of this class is to wrap the ``write()`` function returned by

344

a WSGI ``start_response()`` function. Since ``write()`` is a callable and

345

a WSGI ``start_response()`` function. Since ``write()`` is a callable and

345

not a file object, it doesn't implement other file object methods.

346

not a file object, it doesn't implement other file object methods.

346

"""

347

"""

347

def __init__(self, writefn):

348

def __init__(self, writefn):

348

self._write = writefn

349

self._write = writefn

349

self._offset = 0

350

self._offset = 0

350

351

def write(self, s):

352

def write(self, s):

352

res = self._write(s)

353

res = self._write(s)

353

# Some Python objects don't report the number of bytes written.

354

# Some Python objects don't report the number of bytes written.

354

if res is None:

355

if res is None:

355

self._offset += len(s)

356

self._offset += len(s)

356

else:

357

else:

357

self._offset += res

358

self._offset += res

358

359

def flush(self):

360

def flush(self):

360

pass

361

pass

361

362

def tell(self):

363

def tell(self):

363

return self._offset

364

return self._offset

364

365

class wsgiresponse(object):

366

class wsgiresponse(object):

366

"""Represents a response to a WSGI request.

367

"""Represents a response to a WSGI request.

367

368

A response consists of a status line, headers, and a body.

369

A response consists of a status line, headers, and a body.

369

370

Consumers must populate the ``status`` and ``headers`` fields and

371

Consumers must populate the ``status`` and ``headers`` fields and

371

make a call to a ``setbody*()`` method before the response can be

372

make a call to a ``setbody*()`` method before the response can be

372

issued.

373

issued.

373

374

When it is time to start sending the response over the wire,

375

When it is time to start sending the response over the wire,

375

``sendresponse()`` is called. It handles emitting the header portion

376

``sendresponse()`` is called. It handles emitting the header portion

376

of the response message. It then yields chunks of body data to be

377

of the response message. It then yields chunks of body data to be

377

written to the peer. Typically, the WSGI application itself calls

378

written to the peer. Typically, the WSGI application itself calls

378

and returns the value from ``sendresponse()``.

379

and returns the value from ``sendresponse()``.

379

"""

380

"""

380

381

def __init__(self, req, startresponse):

382

def __init__(self, req, startresponse):

382

"""Create an empty response tied to a specific request.

383

"""Create an empty response tied to a specific request.

383

384

``req`` is a ``parsedrequest``. ``startresponse`` is the

385

``req`` is a ``parsedrequest``. ``startresponse`` is the

385

``start_response`` function passed to the WSGI application.

386

``start_response`` function passed to the WSGI application.

386

"""

387

"""

387

self._req = req

388

self._req = req

388

self._startresponse = startresponse

389

self._startresponse = startresponse

389

390

self.status = None

391

self.status = None

391

from . import wsgiheaders # avoid cycle

392

from . import wsgiheaders # avoid cycle

392

self.headers = wsgiheaders.Headers([])

393

self.headers = wsgiheaders.Headers([])

393

394

self._bodybytes = None

395

self._bodybytes = None

395

self._bodygen = None

396

self._bodygen = None

396

self._bodywillwrite = False

397

self._bodywillwrite = False

397

self._started = False

398

self._started = False

398

self._bodywritefn = None

399

self._bodywritefn = None

399

400

def _verifybody(self):

401

def _verifybody(self):

401

if (self._bodybytes is not None or self._bodygen is not None

402

if (self._bodybytes is not None or self._bodygen is not None

402

or self._bodywillwrite):

403

or self._bodywillwrite):

403

raise error.ProgrammingError('cannot define body multiple times')

404

raise error.ProgrammingError('cannot define body multiple times')

404

405

def setbodybytes(self, b):

406

def setbodybytes(self, b):

406

"""Define the response body as static bytes.

407

"""Define the response body as static bytes.

407

408

The empty string signals that there is no response body.

409

The empty string signals that there is no response body.

409

"""

410

"""

410

self._verifybody()

411

self._verifybody()

411

self._bodybytes = b

412

self._bodybytes = b

412

self.headers['Content-Length'] = '%d' % len(b)

413

self.headers['Content-Length'] = '%d' % len(b)

413

414

def setbodygen(self, gen):

415

def setbodygen(self, gen):

415

"""Define the response body as a generator of bytes."""

416

"""Define the response body as a generator of bytes."""

416

self._verifybody()

417

self._verifybody()

417

self._bodygen = gen

418

self._bodygen = gen

418

419

def setbodywillwrite(self):

420

def setbodywillwrite(self):

420

"""Signal an intent to use write() to emit the response body.

421

"""Signal an intent to use write() to emit the response body.

421

422

**This is the least preferred way to send a body.**

423

**This is the least preferred way to send a body.**

423

424

It is preferred for WSGI applications to emit a generator of chunks

425

It is preferred for WSGI applications to emit a generator of chunks

425

constituting the response body. However, some consumers can't emit

426

constituting the response body. However, some consumers can't emit

426

data this way. So, WSGI provides a way to obtain a ``write(data)``

427

data this way. So, WSGI provides a way to obtain a ``write(data)``

427

function that can be used to synchronously perform an unbuffered

428

function that can be used to synchronously perform an unbuffered

428

write.

429

write.

429

430

Calling this function signals an intent to produce the body in this

431

Calling this function signals an intent to produce the body in this

431

manner.

432

manner.

432

"""

433

"""

433

self._verifybody()

434

self._verifybody()

434

self._bodywillwrite = True

435

self._bodywillwrite = True

435

436

def sendresponse(self):

437

def sendresponse(self):

437

"""Send the generated response to the client.

438

"""Send the generated response to the client.

438

439

Before this is called, ``status`` must be set and one of

440

Before this is called, ``status`` must be set and one of

440

``setbodybytes()`` or ``setbodygen()`` must be called.

441

``setbodybytes()`` or ``setbodygen()`` must be called.

441

442

Calling this method multiple times is not allowed.

443

Calling this method multiple times is not allowed.

443

"""

444

"""

444

if self._started:

445

if self._started:

445

raise error.ProgrammingError('sendresponse() called multiple times')

446

raise error.ProgrammingError('sendresponse() called multiple times')

446

447

self._started = True

448

self._started = True

448

449

if not self.status:

450

if not self.status:

450

raise error.ProgrammingError('status line not defined')

451

raise error.ProgrammingError('status line not defined')

451

452

if (self._bodybytes is None and self._bodygen is None

453

if (self._bodybytes is None and self._bodygen is None

453

and not self._bodywillwrite):

454

and not self._bodywillwrite):

454

raise error.ProgrammingError('response body not defined')

455

raise error.ProgrammingError('response body not defined')

455

456

# RFC 7232 Section 4.1 states that a 304 MUST generate one of

457

# RFC 7232 Section 4.1 states that a 304 MUST generate one of

457

# {Cache-Control, Content-Location, Date, ETag, Expires, Vary}

458

# {Cache-Control, Content-Location, Date, ETag, Expires, Vary}

458

# and SHOULD NOT generate other headers unless they could be used

459

# and SHOULD NOT generate other headers unless they could be used

459

# to guide cache updates. Furthermore, RFC 7230 Section 3.3.2

460

# to guide cache updates. Furthermore, RFC 7230 Section 3.3.2

460

# states that no response body can be issued. Content-Length can

461

# states that no response body can be issued. Content-Length can

461

# be sent. But if it is present, it should be the size of the response

462

# be sent. But if it is present, it should be the size of the response

462

# that wasn't transferred.

463

# that wasn't transferred.

463

if self.status.startswith('304 '):

464

if self.status.startswith('304 '):

464

# setbodybytes('') will set C-L to 0. This doesn't conform with the

465

# setbodybytes('') will set C-L to 0. This doesn't conform with the

465

# spec. So remove it.

466

# spec. So remove it.

466

if self.headers.get('Content-Length') == '0':

467

if self.headers.get('Content-Length') == '0':

467

del self.headers['Content-Length']

468

del self.headers['Content-Length']

468

469

# Strictly speaking, this is too strict. But until it causes

470

# Strictly speaking, this is too strict. But until it causes

470

# problems, let's be strict.

471

# problems, let's be strict.

471

badheaders = {k for k in self.headers.keys()

472

badheaders = {k for k in self.headers.keys()

472

if k.lower() not in ('date', 'etag', 'expires',

473

if k.lower() not in ('date', 'etag', 'expires',

473

'cache-control',

474

'cache-control',

474

'content-location',

475

'content-location',

475

'vary')}

476

'vary')}

476

if badheaders:

477

if badheaders:

477

raise error.ProgrammingError(

478

raise error.ProgrammingError(

478

'illegal header on 304 response: %s' %

479

'illegal header on 304 response: %s' %

479

', '.join(sorted(badheaders)))

480

', '.join(sorted(badheaders)))

480

481

if self._bodygen is not None or self._bodywillwrite:

482

if self._bodygen is not None or self._bodywillwrite:

482

raise error.ProgrammingError("must use setbodybytes('') with "

483

raise error.ProgrammingError("must use setbodybytes('') with "

483

"304 responses")

484

"304 responses")

484

485

# Various HTTP clients (notably httplib) won't read the HTTP response

486

# Various HTTP clients (notably httplib) won't read the HTTP response

486

# until the HTTP request has been sent in full. If servers (us) send a

487

# until the HTTP request has been sent in full. If servers (us) send a

487

# response before the HTTP request has been fully sent, the connection

488

# response before the HTTP request has been fully sent, the connection

488

# may deadlock because neither end is reading.

489

# may deadlock because neither end is reading.

489

#

490

#

490

# We work around this by "draining" the request data before

491

# We work around this by "draining" the request data before

491

# sending any response in some conditions.

492

# sending any response in some conditions.

492

drain = False

493

drain = False

493

close = False

494

close = False

494

495

# If the client sent Expect: 100-continue, we assume it is smart enough

496

# If the client sent Expect: 100-continue, we assume it is smart enough

496

# to deal with the server sending a response before reading the request.

497

# to deal with the server sending a response before reading the request.

497

# (httplib doesn't do this.)

498

# (httplib doesn't do this.)

498

if self._req.headers.get('Expect', '').lower() == '100-continue':

499

if self._req.headers.get('Expect', '').lower() == '100-continue':

499

pass

500

pass

500

# Only tend to request methods that have bodies. Strictly speaking,

501

# Only tend to request methods that have bodies. Strictly speaking,

501

# we should sniff for a body. But this is fine for our existing

502

# we should sniff for a body. But this is fine for our existing

502

# WSGI applications.

503

# WSGI applications.

503

elif self._req.method not in ('POST', 'PUT'):

504

elif self._req.method not in ('POST', 'PUT'):

504

pass

505

pass

505

else:

506

else:

506

# If we don't know how much data to read, there's no guarantee

507

# If we don't know how much data to read, there's no guarantee

507

# that we can drain the request responsibly. The WSGI

508

# that we can drain the request responsibly. The WSGI

508

# specification only says that servers *should* ensure the

509

# specification only says that servers *should* ensure the

509

# input stream doesn't overrun the actual request. So there's

510

# input stream doesn't overrun the actual request. So there's

510

# no guarantee that reading until EOF won't corrupt the stream

511

# no guarantee that reading until EOF won't corrupt the stream

511

# state.

512

# state.

512

if not isinstance(self._req.bodyfh, util.cappedreader):

513

if not isinstance(self._req.bodyfh, util.cappedreader):

513

close = True

514

close = True

514

else:

515

else:

515

# We /could/ only drain certain HTTP response codes. But 200 and

516

# We /could/ only drain certain HTTP response codes. But 200 and

516

# non-200 wire protocol responses both require draining. Since

517

# non-200 wire protocol responses both require draining. Since

517

# we have a capped reader in place for all situations where we

518

# we have a capped reader in place for all situations where we

518

# drain, it is safe to read from that stream. We'll either do

519

# drain, it is safe to read from that stream. We'll either do

519

# a drain or no-op if we're already at EOF.

520

# a drain or no-op if we're already at EOF.

520

drain = True

521

drain = True

521

522

if close:

523

if close:

523

self.headers['Connection'] = 'Close'

524

self.headers['Connection'] = 'Close'

524

525

if drain:

526

if drain:

526

assert isinstance(self._req.bodyfh, util.cappedreader)

527

assert isinstance(self._req.bodyfh, util.cappedreader)

527

while True:

528

while True:

528

chunk = self._req.bodyfh.read(32768)

529

chunk = self._req.bodyfh.read(32768)

529

if not chunk:

530

if not chunk:

530

break

531

break

531

532

strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for

533

strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for

533

k, v in self.headers.items()]

534

k, v in self.headers.items()]

534

write = self._startresponse(pycompat.sysstr(self.status),

535

write = self._startresponse(pycompat.sysstr(self.status),

535

strheaders)

536

strheaders)

536

537

if self._bodybytes:

538

if self._bodybytes:

538

yield self._bodybytes

539

yield self._bodybytes

539

elif self._bodygen:

540

elif self._bodygen:

540

for chunk in self._bodygen:

541

for chunk in self._bodygen:

541

yield chunk

542

yield chunk

542

elif self._bodywillwrite:

543

elif self._bodywillwrite:

543

self._bodywritefn = write

544

self._bodywritefn = write

544

else:

545

else:

545

error.ProgrammingError('do not know how to send body')

546

error.ProgrammingError('do not know how to send body')

546

547

def getbodyfile(self):

548

def getbodyfile(self):

548

"""Obtain a file object like object representing the response body.

549

"""Obtain a file object like object representing the response body.

549

550

For this to work, you must call ``setbodywillwrite()`` and then

551

For this to work, you must call ``setbodywillwrite()`` and then

551

``sendresponse()`` first. ``sendresponse()`` is a generator and the

552

``sendresponse()`` first. ``sendresponse()`` is a generator and the

552

function won't run to completion unless the generator is advanced. The

553

function won't run to completion unless the generator is advanced. The

553

generator yields not items. The easiest way to consume it is with

554

generator yields not items. The easiest way to consume it is with

554

``list(res.sendresponse())``, which should resolve to an empty list -

555

``list(res.sendresponse())``, which should resolve to an empty list -

555

``[]``.

556

``[]``.

556

"""

557

"""

557

if not self._bodywillwrite:

558

if not self._bodywillwrite:

558

raise error.ProgrammingError('must call setbodywillwrite() first')

559

raise error.ProgrammingError('must call setbodywillwrite() first')

559

560

if not self._started:

561

if not self._started:

561

raise error.ProgrammingError('must call sendresponse() first; did '

562

raise error.ProgrammingError('must call sendresponse() first; did '

562

'you remember to consume it since it '

563

'you remember to consume it since it '

563

'is a generator?')

564

'is a generator?')

564

565

assert self._bodywritefn

566

assert self._bodywritefn

566

return offsettrackingwriter(self._bodywritefn)

567

return offsettrackingwriter(self._bodywritefn)

567

568

def wsgiapplication(app_maker):

569

def wsgiapplication(app_maker):

569

'''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

570

'''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

570

can and should now be used as a WSGI application.'''

571

can and should now be used as a WSGI application.'''

571

application = app_maker()

572

application = app_maker()

572

def run_wsgi(env, respond):

573

def run_wsgi(env, respond):

573

return application(env, respond)

574

return application(env, respond)

574

return run_wsgi

575

return run_wsgi

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # hgweb/request.py - An http request from either CGI or the standalone server.
             #
             # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             #import wsgiref.validate
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 error,
                 pycompat,
                 util,
             )
             class multidict(object):
                 """A dict like object that can store multiple values for a key.
                 Used to store parsed request parameters.
                 This is inspired by WebOb's class of the same name.
                 """
                 def __init__(self):
                     self._items = {}
                 def __getitem__(self, key):
                     """Returns the last set value for a key."""
                     return self._items[key][-1]
                 def __setitem__(self, key, value):
                     """Replace a values for a key with a new value."""
                     self._items[key] = [value]
                 def __delitem__(self, key):
                     """Delete all values for a key."""
                     del self._items[key]
                 def __contains__(self, key):
                     return key in self._items
                 def __len__(self):
                     return len(self._items)
                 def get(self, key, default=None):
                     try:
                         return self.__getitem__(key)
                     except KeyError:
                         return default
                 def add(self, key, value):
                     """Add a new value for a key. Does not replace existing values."""
                     self._items.setdefault(key, []).append(value)
                 def getall(self, key):
                     """Obtains all values for a key."""
                     return self._items.get(key, [])
                 def getone(self, key):
                     """Obtain a single value for a key.
                     Raises KeyError if key not defined or it has multiple values set.
                     """
                     vals = self._items[key]
                     if len(vals) > 1:
                         raise KeyError('multiple values for %r' % key)
                     return vals[0]
                 def asdictoflists(self):
                     return {k: list(v) for k, v in self._items.iteritems()}
             @attr.s(frozen=True)
             class parsedrequest(object):
                 """Represents a parsed WSGI request.
                 Contains both parsed parameters as well as a handle on the input stream.
                 """
                 # Request method.
                 method = attr.ib()
                 # Full URL for this request.
                 url = attr.ib()
                 # URL without any path components. Just <proto>://<host><port>.
                 baseurl = attr.ib()
                 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
                 # of HTTP: Host header for hostname. This is likely what clients used.
                 advertisedurl = attr.ib()
                 advertisedbaseurl = attr.ib()
                 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
                 urlscheme = attr.ib()
                 # Value of REMOTE_USER, if set, or None.
                 remoteuser = attr.ib()
                 # Value of REMOTE_HOST, if set, or None.
                 remotehost = attr.ib()
                 # Relative WSGI application path. If defined, will begin with a
                 # ``/``.
                 apppath = attr.ib()
                 # List of path parts to be used for dispatch.
                 dispatchparts = attr.ib()
                 # URL path component (no query string) used for dispatch. Can be
                 # ``None`` to signal no path component given to the request, an
                 # empty string to signal a request to the application's root URL,
                 # or a string not beginning with ``/`` containing the requested
                 # path under the application.
                 dispatchpath = attr.ib()
                 # The name of the repository being accessed.
                 reponame = attr.ib()
                 # Raw query string (part after "?" in URL).
                 querystring = attr.ib()
                 # multidict of query string parameters.
                 qsparams = attr.ib()
                 # wsgiref.headers.Headers instance. Operates like a dict with case
                 # insensitive keys.
                 headers = attr.ib()
                 # Request body input stream.
                 bodyfh = attr.ib()
                 # WSGI environment dict, unmodified.
                 rawenv = attr.ib()
             def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
                 """Parse URL components from environment variables.
                 WSGI defines request attributes via environment variables. This function
                 parses the environment variables into a data structure.
                 If ``reponame`` is defined, the leading path components matching that
                 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
                 This simulates the world view of a WSGI application that processes
                 requests from the base URL of a repo.
                 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
                 is defined, it is used - instead of the WSGI environment variables - for
                 constructing URL components up to and including the WSGI application path.
                 For example, if the current WSGI application is at ``/repo`` and a request
                 is made to ``/rev/@`` with this argument set to
                 ``http://myserver:9000/prefix``, the URL and path components will resolve as
                 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
                 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
                 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
                 ``bodyfh`` can be used to specify a file object to read the request body
                 from. If not defined, ``wsgi.input`` from the environment dict is used.
                 """
                 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
                 # We first validate that the incoming object conforms with the WSGI spec.
                 # We only want to be dealing with spec-conforming WSGI implementations.
                 # TODO enable this once we fix internal violations.
                 #wsgiref.validate.check_environ(env)
                 # PEP-0333 states that environment keys and values are native strings
                 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
                 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
                 # in Mercurial, so mass convert string keys and values to bytes.
                 if pycompat.ispy3:
                     env = {k.encode('latin-1'): v for k, v in env.iteritems()}
                     env = {k: v.encode('latin-1') if isinstance(v, str) else v
                            for k, v in env.iteritems()}
                 # Some hosting solutions are emulating hgwebdir, and dispatching directly
                 # to an hgweb instance using this environment variable.  This was always
                 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
                 if not reponame:
                     reponame = env.get('REPO_NAME')
                 if altbaseurl:
                     altbaseurl = util.url(altbaseurl)
                 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
                 # the environment variables.
                 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
                 # how URLs are reconstructed.
                 fullurl = env['wsgi.url_scheme'] + '://'
                 if altbaseurl and altbaseurl.scheme:
                     advertisedfullurl = altbaseurl.scheme + '://'
                 else:
                     advertisedfullurl = fullurl
                 def addport(s, port):
                     if s.startswith('https://'):
                         if port != '443':
                             s += ':' + port
                     else:
                         if port != '80':
                             s += ':' + port
                     return s
                 if env.get('HTTP_HOST'):
                     fullurl += env['HTTP_HOST']
                 else:
                     fullurl += env['SERVER_NAME']
                     fullurl = addport(fullurl, env['SERVER_PORT'])
                 if altbaseurl and altbaseurl.host:
                     advertisedfullurl += altbaseurl.host
                     if altbaseurl.port:
                         port = altbaseurl.port
                     elif altbaseurl.scheme == 'http' and not altbaseurl.port:
                         port = '80'
                     elif altbaseurl.scheme == 'https' and not altbaseurl.port:
                         port = '443'
                     else:
                         port = env['SERVER_PORT']
                     advertisedfullurl = addport(advertisedfullurl, port)
                 else:
                     advertisedfullurl += env['SERVER_NAME']
                     advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
                 baseurl = fullurl
                 advertisedbaseurl = advertisedfullurl
                 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
                 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
                 if altbaseurl:
                     path = altbaseurl.path or ''
                     if path and not path.startswith('/'):
                         path = '/' + path
                     advertisedfullurl += util.urlreq.quote(path)
                 else:
                     advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
                 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
                 if env.get('QUERY_STRING'):
                     fullurl += '?' + env['QUERY_STRING']
                     advertisedfullurl += '?' + env['QUERY_STRING']
                 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
                 # that represents the repository being dispatched to. When computing
                 # the dispatch info, we ignore these leading path components.
                 if altbaseurl:
                     apppath = altbaseurl.path or ''
                     if apppath and not apppath.startswith('/'):
                         apppath = '/' + apppath
                 else:
                     apppath = env.get('SCRIPT_NAME', '')
                 if reponame:
                     repoprefix = '/' + reponame.strip('/')
                     if not env.get('PATH_INFO'):
                         raise error.ProgrammingError('reponame requires PATH_INFO')
                     if not env['PATH_INFO'].startswith(repoprefix):
                         raise error.ProgrammingError('PATH_INFO does not begin with repo '
                                                      'name: %s (%s)' % (env['PATH_INFO'],
                                                                         reponame))
                     dispatchpath = env['PATH_INFO'][len(repoprefix):]
                     if dispatchpath and not dispatchpath.startswith('/'):
                         raise error.ProgrammingError('reponame prefix of PATH_INFO does '
                                                      'not end at path delimiter: %s (%s)' %
                                                      (env['PATH_INFO'], reponame))
                     apppath = apppath.rstrip('/') + repoprefix
                     dispatchparts = dispatchpath.strip('/').split('/')
                     dispatchpath = '/'.join(dispatchparts)
                 elif 'PATH_INFO' in env:
                     if env['PATH_INFO'].strip('/'):
                         dispatchparts = env['PATH_INFO'].strip('/').split('/')
                         dispatchpath = '/'.join(dispatchparts)
                     else:
                         dispatchparts = []
                         dispatchpath = ''
                 else:
                     dispatchparts = []
                     dispatchpath = None
                 querystring = env.get('QUERY_STRING', '')
                 # We store as a list so we have ordering information. We also store as
                 # a dict to facilitate fast lookup.
                 qsparams = multidict()
                 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
                     qsparams.add(k, v)
                 # HTTP_* keys contain HTTP request headers. The Headers structure should
                 # perform case normalization for us. We just rewrite underscore to dash
                 # so keys match what likely went over the wire.
                 headers = []
                 for k, v in env.iteritems():
                     if k.startswith('HTTP_'):
                         headers.append((k[len('HTTP_'):].replace('_', '-'), v))
                 from . import wsgiheaders # avoid cycle
                 headers = wsgiheaders.Headers(headers)
                 # This is kind of a lie because the HTTP header wasn't explicitly
                 # sent. But for all intents and purposes it should be OK to lie about
                 # this, since a consumer will either either value to determine how many
                 # bytes are available to read.
                 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
                     headers['Content-Length'] = env['CONTENT_LENGTH']
                 if 'CONTENT_TYPE' in env and 'HTTP_CONTENT_TYPE' not in env:
                     headers['Content-Type'] = env['CONTENT_TYPE']
                 if bodyfh is None:
                     bodyfh = env['wsgi.input']
                     if 'Content-Length' in headers:
-                        bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
+                        bodyfh = util.cappedreader(bodyfh,
+                                                   int(headers['Content-Length'] or '0'))
                 return parsedrequest(method=env['REQUEST_METHOD'],
                                      url=fullurl, baseurl=baseurl,
                                      advertisedurl=advertisedfullurl,
                                      advertisedbaseurl=advertisedbaseurl,
                                      urlscheme=env['wsgi.url_scheme'],
                                      remoteuser=env.get('REMOTE_USER'),
                                      remotehost=env.get('REMOTE_HOST'),
                                      apppath=apppath,
                                      dispatchparts=dispatchparts, dispatchpath=dispatchpath,
                                      reponame=reponame,
                                      querystring=querystring,
                                      qsparams=qsparams,
                                      headers=headers,
                                      bodyfh=bodyfh,
                                      rawenv=env)
             class offsettrackingwriter(object):
                 """A file object like object that is append only and tracks write count.
                 Instances are bound to a callable. This callable is called with data
                 whenever a ``write()`` is attempted.
                 Instances track the amount of written data so they can answer ``tell()``
                 requests.
                 The intent of this class is to wrap the ``write()`` function returned by
                 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
                 not a file object, it doesn't implement other file object methods.
                 """
                 def __init__(self, writefn):
                     self._write = writefn
                     self._offset = 0
                 def write(self, s):
                     res = self._write(s)
                     # Some Python objects don't report the number of bytes written.
                     if res is None:
                         self._offset += len(s)
                     else:
                         self._offset += res
                 def flush(self):
                     pass
                 def tell(self):
                     return self._offset
             class wsgiresponse(object):
                 """Represents a response to a WSGI request.
                 A response consists of a status line, headers, and a body.
                 Consumers must populate the ``status`` and ``headers`` fields and
                 make a call to a ``setbody*()`` method before the response can be
                 issued.
                 When it is time to start sending the response over the wire,
                 ``sendresponse()`` is called. It handles emitting the header portion
                 of the response message. It then yields chunks of body data to be
                 written to the peer. Typically, the WSGI application itself calls
                 and returns the value from ``sendresponse()``.
                 """
                 def __init__(self, req, startresponse):
                     """Create an empty response tied to a specific request.
                     ``req`` is a ``parsedrequest``. ``startresponse`` is the
                     ``start_response`` function passed to the WSGI application.
                     """
                     self._req = req
                     self._startresponse = startresponse
                     self.status = None
                     from . import wsgiheaders # avoid cycle
                     self.headers = wsgiheaders.Headers([])
                     self._bodybytes = None
                     self._bodygen = None
                     self._bodywillwrite = False
                     self._started = False
                     self._bodywritefn = None
                 def _verifybody(self):
                     if (self._bodybytes is not None or self._bodygen is not None
                         or self._bodywillwrite):
                         raise error.ProgrammingError('cannot define body multiple times')
                 def setbodybytes(self, b):
                     """Define the response body as static bytes.
                     The empty string signals that there is no response body.
                     """
                     self._verifybody()
                     self._bodybytes = b
                     self.headers['Content-Length'] = '%d' % len(b)
                 def setbodygen(self, gen):
                     """Define the response body as a generator of bytes."""
                     self._verifybody()
                     self._bodygen = gen
                 def setbodywillwrite(self):
                     """Signal an intent to use write() to emit the response body.
                     **This is the least preferred way to send a body.**
                     It is preferred for WSGI applications to emit a generator of chunks
                     constituting the response body. However, some consumers can't emit
                     data this way. So, WSGI provides a way to obtain a ``write(data)``
                     function that can be used to synchronously perform an unbuffered
                     write.
                     Calling this function signals an intent to produce the body in this
                     manner.
                     """
                     self._verifybody()
                     self._bodywillwrite = True
                 def sendresponse(self):
                     """Send the generated response to the client.
                     Before this is called, ``status`` must be set and one of
                     ``setbodybytes()`` or ``setbodygen()`` must be called.
                     Calling this method multiple times is not allowed.
                     """
                     if self._started:
                         raise error.ProgrammingError('sendresponse() called multiple times')
                     self._started = True
                     if not self.status:
                         raise error.ProgrammingError('status line not defined')
                     if (self._bodybytes is None and self._bodygen is None
                         and not self._bodywillwrite):
                         raise error.ProgrammingError('response body not defined')
                     # RFC 7232 Section 4.1 states that a 304 MUST generate one of
                     # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
                     # and SHOULD NOT generate other headers unless they could be used
                     # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
                     # states that no response body can be issued. Content-Length can
                     # be sent. But if it is present, it should be the size of the response
                     # that wasn't transferred.
                     if self.status.startswith('304 '):
                         # setbodybytes('') will set C-L to 0. This doesn't conform with the
                         # spec. So remove it.
                         if self.headers.get('Content-Length') == '0':
                             del self.headers['Content-Length']
                         # Strictly speaking, this is too strict. But until it causes
                         # problems, let's be strict.
                         badheaders = {k for k in self.headers.keys()
                                       if k.lower() not in ('date', 'etag', 'expires',
                                                            'cache-control',
                                                            'content-location',
                                                            'vary')}
                         if badheaders:
                             raise error.ProgrammingError(
                                 'illegal header on 304 response: %s' %
                                 ', '.join(sorted(badheaders)))
                         if self._bodygen is not None or self._bodywillwrite:
                             raise error.ProgrammingError("must use setbodybytes('') with "
                                                          "304 responses")
                     # Various HTTP clients (notably httplib) won't read the HTTP response
                     # until the HTTP request has been sent in full. If servers (us) send a
                     # response before the HTTP request has been fully sent, the connection
                     # may deadlock because neither end is reading.
                     #
                     # We work around this by "draining" the request data before
                     # sending any response in some conditions.
                     drain = False
                     close = False
                     # If the client sent Expect: 100-continue, we assume it is smart enough
                     # to deal with the server sending a response before reading the request.
                     # (httplib doesn't do this.)
                     if self._req.headers.get('Expect', '').lower() == '100-continue':
                         pass
                     # Only tend to request methods that have bodies. Strictly speaking,
                     # we should sniff for a body. But this is fine for our existing
                     # WSGI applications.
                     elif self._req.method not in ('POST', 'PUT'):
                         pass
                     else:
                         # If we don't know how much data to read, there's no guarantee
                         # that we can drain the request responsibly. The WSGI
                         # specification only says that servers *should* ensure the
                         # input stream doesn't overrun the actual request. So there's
                         # no guarantee that reading until EOF won't corrupt the stream
                         # state.
                         if not isinstance(self._req.bodyfh, util.cappedreader):
                             close = True
                         else:
                             # We /could/ only drain certain HTTP response codes. But 200 and
                             # non-200 wire protocol responses both require draining. Since
                             # we have a capped reader in place for all situations where we
                             # drain, it is safe to read from that stream. We'll either do
                             # a drain or no-op if we're already at EOF.
                             drain = True
                     if close:
                         self.headers['Connection'] = 'Close'
                     if drain:
                         assert isinstance(self._req.bodyfh, util.cappedreader)
                         while True:
                             chunk = self._req.bodyfh.read(32768)
                             if not chunk:
                                 break
                     strheaders = [(pycompat.strurl(k), pycompat.strurl(v)) for
                                   k, v in self.headers.items()]
                     write = self._startresponse(pycompat.sysstr(self.status),
                                                 strheaders)
                     if self._bodybytes:
                         yield self._bodybytes
                     elif self._bodygen:
                         for chunk in self._bodygen:
                             yield chunk
                     elif self._bodywillwrite:
                         self._bodywritefn = write
                     else:
                         error.ProgrammingError('do not know how to send body')
                 def getbodyfile(self):
                     """Obtain a file object like object representing the response body.
                     For this to work, you must call ``setbodywillwrite()`` and then
                     ``sendresponse()`` first. ``sendresponse()`` is a generator and the
                     function won't run to completion unless the generator is advanced. The
                     generator yields not items. The easiest way to consume it is with
                     ``list(res.sendresponse())``, which should resolve to an empty list -
                     ``[]``.
                     """
                     if not self._bodywillwrite:
                         raise error.ProgrammingError('must call setbodywillwrite() first')
                     if not self._started:
                         raise error.ProgrammingError('must call sendresponse() first; did '
                                                      'you remember to consume it since it '
                                                      'is a generator?')
                     assert self._bodywritefn
                     return offsettrackingwriter(self._bodywritefn)
             def wsgiapplication(app_maker):
                 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
                 can and should now be used as a WSGI application.'''
                 application = app_maker()
                 def run_wsgi(env, respond):
                     return application(env, respond)
                 return run_wsgi