upstream/mercurial-mirror Commit - r49760:7eebe563

1

# hgweb/request.py - An http request from either CGI or the standalone server.

1

# hgweb/request.py - An http request from either CGI or the standalone server.

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

10

# import wsgiref.validate

10

# import wsgiref.validate

11

12

from ..thirdparty import attr

12

from ..thirdparty import attr

13

from .. import (

13

from .. import (

14

encoding,

14

encoding,

15

error,

15

error,

16

pycompat,

16

pycompat,

17

util,

17

util,

18

)

18

)

19

from ..utils import (

19

from ..utils import (

20

urlutil,

20

urlutil,

21

)

21

)

22

23

24

class multidict(object):

24

class multidict(object):

25

"""A dict like object that can store multiple values for a key.

25

"""A dict like object that can store multiple values for a key.

26

27

Used to store parsed request parameters.

27

Used to store parsed request parameters.

28

29

This is inspired by WebOb's class of the same name.

29

This is inspired by WebOb's class of the same name.

30

"""

30

"""

31

32

def __init__(self):

32

def __init__(self):

33

self._items = {}

33

self._items = {}

34

35

def __getitem__(self, key):

35

def __getitem__(self, key):

36

"""Returns the last set value for a key."""

36

"""Returns the last set value for a key."""

37

return self._items[key][-1]

37

return self._items[key][-1]

38

39

def __setitem__(self, key, value):

39

def __setitem__(self, key, value):

40

"""Replace a values for a key with a new value."""

40

"""Replace a values for a key with a new value."""

41

self._items[key] = [value]

41

self._items[key] = [value]

42

43

def __delitem__(self, key):

43

def __delitem__(self, key):

44

"""Delete all values for a key."""

44

"""Delete all values for a key."""

45

del self._items[key]

45

del self._items[key]

46

47

def __contains__(self, key):

47

def __contains__(self, key):

48

return key in self._items

48

return key in self._items

49

50

def __len__(self):

50

def __len__(self):

51

return len(self._items)

51

return len(self._items)

52

53

def get(self, key, default=None):

53

def get(self, key, default=None):

54

try:

54

try:

55

return self.__getitem__(key)

55

return self.__getitem__(key)

56

except KeyError:

56

except KeyError:

57

return default

57

return default

58

59

def add(self, key, value):

59

def add(self, key, value):

60

"""Add a new value for a key. Does not replace existing values."""

60

"""Add a new value for a key. Does not replace existing values."""

61

self._items.setdefault(key, []).append(value)

61

self._items.setdefault(key, []).append(value)

62

63

def getall(self, key):

63

def getall(self, key):

64

"""Obtains all values for a key."""

64

"""Obtains all values for a key."""

65

return self._items.get(key, [])

65

return self._items.get(key, [])

66

67

def getone(self, key):

67

def getone(self, key):

68

"""Obtain a single value for a key.

68

"""Obtain a single value for a key.

69

70

Raises KeyError if key not defined or it has multiple values set.

70

Raises KeyError if key not defined or it has multiple values set.

71

"""

71

"""

72

vals = self._items[key]

72

vals = self._items[key]

73

74

if len(vals) > 1:

74

if len(vals) > 1:

75

raise KeyError(b'multiple values for %r' % key)

75

raise KeyError(b'multiple values for %r' % key)

76

77

return vals[0]

77

return vals[0]

78

79

def asdictoflists(self):

79

def asdictoflists(self):

80

return {k: list(v) for k, v in pycompat.iteritems(self._items)}

80

return {k: list(v) for k, v in pycompat.iteritems(self._items)}

81

82

83

@attr.s(frozen=True)

83

@attr.s(frozen=True)

84

class parsedrequest(object):

84

class parsedrequest(object):

85

"""Represents a parsed WSGI request.

85

"""Represents a parsed WSGI request.

86

87

Contains both parsed parameters as well as a handle on the input stream.

87

Contains both parsed parameters as well as a handle on the input stream.

88

"""

88

"""

89

90

# Request method.

90

# Request method.

91

method = attr.ib()

91

method = attr.ib()

92

# Full URL for this request.

92

# Full URL for this request.

93

url = attr.ib()

93

url = attr.ib()

94

# URL without any path components. Just <proto>://<host><port>.

94

# URL without any path components. Just <proto>://<host><port>.

95

baseurl = attr.ib()

95

baseurl = attr.ib()

96

# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead

96

# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead

97

# of HTTP: Host header for hostname. This is likely what clients used.

97

# of HTTP: Host header for hostname. This is likely what clients used.

98

advertisedurl = attr.ib()

98

advertisedurl = attr.ib()

99

advertisedbaseurl = attr.ib()

99

advertisedbaseurl = attr.ib()

100

# URL scheme (part before ``://``). e.g. ``http`` or ``https``.

100

# URL scheme (part before ``://``). e.g. ``http`` or ``https``.

101

urlscheme = attr.ib()

101

urlscheme = attr.ib()

102

# Value of REMOTE_USER, if set, or None.

102

# Value of REMOTE_USER, if set, or None.

103

remoteuser = attr.ib()

103

remoteuser = attr.ib()

104

# Value of REMOTE_HOST, if set, or None.

104

# Value of REMOTE_HOST, if set, or None.

105

remotehost = attr.ib()

105

remotehost = attr.ib()

106

# Relative WSGI application path. If defined, will begin with a

106

# Relative WSGI application path. If defined, will begin with a

107

# ``/``.

107

# ``/``.

108

apppath = attr.ib()

108

apppath = attr.ib()

109

# List of path parts to be used for dispatch.

109

# List of path parts to be used for dispatch.

110

dispatchparts = attr.ib()

110

dispatchparts = attr.ib()

111

# URL path component (no query string) used for dispatch. Can be

111

# URL path component (no query string) used for dispatch. Can be

112

# ``None`` to signal no path component given to the request, an

112

# ``None`` to signal no path component given to the request, an

113

# empty string to signal a request to the application's root URL,

113

# empty string to signal a request to the application's root URL,

114

# or a string not beginning with ``/`` containing the requested

114

# or a string not beginning with ``/`` containing the requested

115

# path under the application.

115

# path under the application.

116

dispatchpath = attr.ib()

116

dispatchpath = attr.ib()

117

# The name of the repository being accessed.

117

# The name of the repository being accessed.

118

reponame = attr.ib()

118

reponame = attr.ib()

119

# Raw query string (part after "?" in URL).

119

# Raw query string (part after "?" in URL).

120

querystring = attr.ib()

120

querystring = attr.ib()

121

# multidict of query string parameters.

121

# multidict of query string parameters.

122

qsparams = attr.ib()

122

qsparams = attr.ib()

123

# wsgiref.headers.Headers instance. Operates like a dict with case

123

# wsgiref.headers.Headers instance. Operates like a dict with case

124

# insensitive keys.

124

# insensitive keys.

125

headers = attr.ib()

125

headers = attr.ib()

126

# Request body input stream.

126

# Request body input stream.

127

bodyfh = attr.ib()

127

bodyfh = attr.ib()

128

# WSGI environment dict, unmodified.

128

# WSGI environment dict, unmodified.

129

rawenv = attr.ib()

129

rawenv = attr.ib()

130

131

132

def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):

132

def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):

133

"""Parse URL components from environment variables.

133

"""Parse URL components from environment variables.

134

135

WSGI defines request attributes via environment variables. This function

135

WSGI defines request attributes via environment variables. This function

136

parses the environment variables into a data structure.

136

parses the environment variables into a data structure.

137

138

If ``reponame`` is defined, the leading path components matching that

138

If ``reponame`` is defined, the leading path components matching that

139

string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.

139

string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.

140

This simulates the world view of a WSGI application that processes

140

This simulates the world view of a WSGI application that processes

141

requests from the base URL of a repo.

141

requests from the base URL of a repo.

142

143

If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)

143

If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)

144

is defined, it is used - instead of the WSGI environment variables - for

144

is defined, it is used - instead of the WSGI environment variables - for

145

constructing URL components up to and including the WSGI application path.

145

constructing URL components up to and including the WSGI application path.

146

For example, if the current WSGI application is at ``/repo`` and a request

146

For example, if the current WSGI application is at ``/repo`` and a request

147

is made to ``/rev/@`` with this argument set to

147

is made to ``/rev/@`` with this argument set to

148

``http://myserver:9000/prefix``, the URL and path components will resolve as

148

``http://myserver:9000/prefix``, the URL and path components will resolve as

149

if the request were to ``http://myserver:9000/prefix/rev/@``. In other

149

if the request were to ``http://myserver:9000/prefix/rev/@``. In other

150

words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and

150

words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and

151

``SCRIPT_NAME`` are all effectively replaced by components from this URL.

151

``SCRIPT_NAME`` are all effectively replaced by components from this URL.

152

153

``bodyfh`` can be used to specify a file object to read the request body

153

``bodyfh`` can be used to specify a file object to read the request body

154

from. If not defined, ``wsgi.input`` from the environment dict is used.

154

from. If not defined, ``wsgi.input`` from the environment dict is used.

155

"""

155

"""

156

# PEP 3333 defines the WSGI spec and is a useful reference for this code.

156

# PEP 3333 defines the WSGI spec and is a useful reference for this code.

157

158

# We first validate that the incoming object conforms with the WSGI spec.

158

# We first validate that the incoming object conforms with the WSGI spec.

159

# We only want to be dealing with spec-conforming WSGI implementations.

159

# We only want to be dealing with spec-conforming WSGI implementations.

160

# TODO enable this once we fix internal violations.

160

# TODO enable this once we fix internal violations.

161

# wsgiref.validate.check_environ(env)

161

# wsgiref.validate.check_environ(env)

162

163

# PEP-0333 states that environment keys and values are native strings

163

# PEP-0333 states that environment keys and values are native strings.

164

# (bytes on Python 2 and str on Python 3). The code points for the Unicode

164

# The code points for the Unicode strings on Python 3 must be between

165

# strings on Python 3 must be between \00000-\000FF. We deal with bytes

165

# \00000-\000FF. We deal with bytes in Mercurial, so mass convert string

166

# ~~in Mercurial, so mass convert string~~ keys and values to bytes.

166

# keys and values to bytes.

167

if pycompat.ispy3:

167

def tobytes(s):

168

if not isinstance(s, str):

169

return s

170

if pycompat.iswindows:

171

# This is what mercurial.encoding does for os.environ on

172

# Windows.

173

return encoding.strtolocal(s)

174

else:

175

# This is what is documented to be used for os.environ on Unix.

176

return pycompat.fsencode(s)

168

177

169

def tobytes(s):

178

env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)}

170

if not isinstance(s, str):

171

return s

172

if pycompat.iswindows:

173

# This is what mercurial.encoding does for os.environ on

174

# Windows.

175

return encoding.strtolocal(s)

176

else:

177

# This is what is documented to be used for os.environ on Unix.

178

return pycompat.fsencode(s)

179

180

env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)}

181

179

182

# Some hosting solutions are emulating hgwebdir, and dispatching directly

180

# Some hosting solutions are emulating hgwebdir, and dispatching directly

183

# to an hgweb instance using this environment variable. This was always

181

# to an hgweb instance using this environment variable. This was always

184

# checked prior to d7fd203e36cc; keep doing so to avoid breaking them.

182

# checked prior to d7fd203e36cc; keep doing so to avoid breaking them.

185

if not reponame:

183

if not reponame:

186

reponame = env.get(b'REPO_NAME')

184

reponame = env.get(b'REPO_NAME')

187

185

188

if altbaseurl:

186

if altbaseurl:

189

altbaseurl = urlutil.url(altbaseurl)

187

altbaseurl = urlutil.url(altbaseurl)

190

188

191

# https://www.python.org/dev/peps/pep-0333/#environ-variables defines

189

# https://www.python.org/dev/peps/pep-0333/#environ-variables defines

192

# the environment variables.

190

# the environment variables.

193

# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines

191

# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines

194

# how URLs are reconstructed.

192

# how URLs are reconstructed.

195

fullurl = env[b'wsgi.url_scheme'] + b'://'

193

fullurl = env[b'wsgi.url_scheme'] + b'://'

196

194

197

if altbaseurl and altbaseurl.scheme:

195

if altbaseurl and altbaseurl.scheme:

198

advertisedfullurl = altbaseurl.scheme + b'://'

196

advertisedfullurl = altbaseurl.scheme + b'://'

199

else:

197

else:

200

advertisedfullurl = fullurl

198

advertisedfullurl = fullurl

201

199

202

def addport(s, port):

200

def addport(s, port):

203

if s.startswith(b'https://'):

201

if s.startswith(b'https://'):

204

if port != b'443':

202

if port != b'443':

205

s += b':' + port

203

s += b':' + port

206

else:

204

else:

207

if port != b'80':

205

if port != b'80':

208

s += b':' + port

206

s += b':' + port

209

207

210

return s

208

return s

211

209

212

if env.get(b'HTTP_HOST'):

210

if env.get(b'HTTP_HOST'):

213

fullurl += env[b'HTTP_HOST']

211

fullurl += env[b'HTTP_HOST']

214

else:

212

else:

215

fullurl += env[b'SERVER_NAME']

213

fullurl += env[b'SERVER_NAME']

216

fullurl = addport(fullurl, env[b'SERVER_PORT'])

214

fullurl = addport(fullurl, env[b'SERVER_PORT'])

217

215

218

if altbaseurl and altbaseurl.host:

216

if altbaseurl and altbaseurl.host:

219

advertisedfullurl += altbaseurl.host

217

advertisedfullurl += altbaseurl.host

220

218

221

if altbaseurl.port:

219

if altbaseurl.port:

222

port = altbaseurl.port

220

port = altbaseurl.port

223

elif altbaseurl.scheme == b'http' and not altbaseurl.port:

221

elif altbaseurl.scheme == b'http' and not altbaseurl.port:

224

port = b'80'

222

port = b'80'

225

elif altbaseurl.scheme == b'https' and not altbaseurl.port:

223

elif altbaseurl.scheme == b'https' and not altbaseurl.port:

226

port = b'443'

224

port = b'443'

227

else:

225

else:

228

port = env[b'SERVER_PORT']

226

port = env[b'SERVER_PORT']

229

227

230

advertisedfullurl = addport(advertisedfullurl, port)

228

advertisedfullurl = addport(advertisedfullurl, port)

231

else:

229

else:

232

advertisedfullurl += env[b'SERVER_NAME']

230

advertisedfullurl += env[b'SERVER_NAME']

233

advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])

231

advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])

234

232

235

baseurl = fullurl

233

baseurl = fullurl

236

advertisedbaseurl = advertisedfullurl

234

advertisedbaseurl = advertisedfullurl

237

235

238

fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))

236

fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))

239

fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))

237

fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))

240

238

241

if altbaseurl:

239

if altbaseurl:

242

path = altbaseurl.path or b''

240

path = altbaseurl.path or b''

243

if path and not path.startswith(b'/'):

241

if path and not path.startswith(b'/'):

244

path = b'/' + path

242

path = b'/' + path

245

advertisedfullurl += util.urlreq.quote(path)

243

advertisedfullurl += util.urlreq.quote(path)

246

else:

244

else:

247

advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))

245

advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))

248

246

249

advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))

247

advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))

250

248

251

if env.get(b'QUERY_STRING'):

249

if env.get(b'QUERY_STRING'):

252

fullurl += b'?' + env[b'QUERY_STRING']

250

fullurl += b'?' + env[b'QUERY_STRING']

253

advertisedfullurl += b'?' + env[b'QUERY_STRING']

251

advertisedfullurl += b'?' + env[b'QUERY_STRING']

254

252

255

# If ``reponame`` is defined, that must be a prefix on PATH_INFO

253

# If ``reponame`` is defined, that must be a prefix on PATH_INFO

256

# that represents the repository being dispatched to. When computing

254

# that represents the repository being dispatched to. When computing

257

# the dispatch info, we ignore these leading path components.

255

# the dispatch info, we ignore these leading path components.

258

256

259

if altbaseurl:

257

if altbaseurl:

260

apppath = altbaseurl.path or b''

258

apppath = altbaseurl.path or b''

261

if apppath and not apppath.startswith(b'/'):

259

if apppath and not apppath.startswith(b'/'):

262

apppath = b'/' + apppath

260

apppath = b'/' + apppath

263

else:

261

else:

264

apppath = env.get(b'SCRIPT_NAME', b'')

262

apppath = env.get(b'SCRIPT_NAME', b'')

265

263

266

if reponame:

264

if reponame:

267

repoprefix = b'/' + reponame.strip(b'/')

265

repoprefix = b'/' + reponame.strip(b'/')

268

266

269

if not env.get(b'PATH_INFO'):

267

if not env.get(b'PATH_INFO'):

270

raise error.ProgrammingError(b'reponame requires PATH_INFO')

268

raise error.ProgrammingError(b'reponame requires PATH_INFO')

271

269

272

if not env[b'PATH_INFO'].startswith(repoprefix):

270

if not env[b'PATH_INFO'].startswith(repoprefix):

273

raise error.ProgrammingError(

271

raise error.ProgrammingError(

274

b'PATH_INFO does not begin with repo '

272

b'PATH_INFO does not begin with repo '

275

b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)

273

b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)

276

)

274

)

277

275

278

dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]

276

dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]

279

277

280

if dispatchpath and not dispatchpath.startswith(b'/'):

278

if dispatchpath and not dispatchpath.startswith(b'/'):

281

raise error.ProgrammingError(

279

raise error.ProgrammingError(

282

b'reponame prefix of PATH_INFO does '

280

b'reponame prefix of PATH_INFO does '

283

b'not end at path delimiter: %s (%s)'

281

b'not end at path delimiter: %s (%s)'

284

% (env[b'PATH_INFO'], reponame)

282

% (env[b'PATH_INFO'], reponame)

285

)

283

)

286

284

287

apppath = apppath.rstrip(b'/') + repoprefix

285

apppath = apppath.rstrip(b'/') + repoprefix

288

dispatchparts = dispatchpath.strip(b'/').split(b'/')

286

dispatchparts = dispatchpath.strip(b'/').split(b'/')

289

dispatchpath = b'/'.join(dispatchparts)

287

dispatchpath = b'/'.join(dispatchparts)

290

288

291

elif b'PATH_INFO' in env:

289

elif b'PATH_INFO' in env:

292

if env[b'PATH_INFO'].strip(b'/'):

290

if env[b'PATH_INFO'].strip(b'/'):

293

dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')

291

dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')

294

dispatchpath = b'/'.join(dispatchparts)

292

dispatchpath = b'/'.join(dispatchparts)

295

else:

293

else:

296

dispatchparts = []

294

dispatchparts = []

297

dispatchpath = b''

295

dispatchpath = b''

298

else:

296

else:

299

dispatchparts = []

297

dispatchparts = []

300

dispatchpath = None

298

dispatchpath = None

301

299

302

querystring = env.get(b'QUERY_STRING', b'')

300

querystring = env.get(b'QUERY_STRING', b'')

303

301

304

# We store as a list so we have ordering information. We also store as

302

# We store as a list so we have ordering information. We also store as

305

# a dict to facilitate fast lookup.

303

# a dict to facilitate fast lookup.

306

qsparams = multidict()

304

qsparams = multidict()

307

for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):

305

for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):

308

qsparams.add(k, v)

306

qsparams.add(k, v)

309

307

310

# HTTP_* keys contain HTTP request headers. The Headers structure should

308

# HTTP_* keys contain HTTP request headers. The Headers structure should

311

# perform case normalization for us. We just rewrite underscore to dash

309

# perform case normalization for us. We just rewrite underscore to dash

312

# so keys match what likely went over the wire.

310

# so keys match what likely went over the wire.

313

headers = []

311

headers = []

314

for k, v in pycompat.iteritems(env):

312

for k, v in pycompat.iteritems(env):

315

if k.startswith(b'HTTP_'):

313

if k.startswith(b'HTTP_'):

316

headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))

314

headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))

317

315

318

from . import wsgiheaders # avoid cycle

316

from . import wsgiheaders # avoid cycle

319

317

320

headers = wsgiheaders.Headers(headers)

318

headers = wsgiheaders.Headers(headers)

321

319

322

# This is kind of a lie because the HTTP header wasn't explicitly

320

# This is kind of a lie because the HTTP header wasn't explicitly

323

# sent. But for all intents and purposes it should be OK to lie about

321

# sent. But for all intents and purposes it should be OK to lie about

324

# this, since a consumer will either either value to determine how many

322

# this, since a consumer will either either value to determine how many

325

# bytes are available to read.

323

# bytes are available to read.

326

if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:

324

if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:

327

headers[b'Content-Length'] = env[b'CONTENT_LENGTH']

325

headers[b'Content-Length'] = env[b'CONTENT_LENGTH']

328

326

329

if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:

327

if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:

330

headers[b'Content-Type'] = env[b'CONTENT_TYPE']

328

headers[b'Content-Type'] = env[b'CONTENT_TYPE']

331

329

332

if bodyfh is None:

330

if bodyfh is None:

333

bodyfh = env[b'wsgi.input']

331

bodyfh = env[b'wsgi.input']

334

if b'Content-Length' in headers:

332

if b'Content-Length' in headers:

335

bodyfh = util.cappedreader(

333

bodyfh = util.cappedreader(

336

bodyfh, int(headers[b'Content-Length'] or b'0')

334

bodyfh, int(headers[b'Content-Length'] or b'0')

337

)

335

)

338

336

339

return parsedrequest(

337

return parsedrequest(

340

method=env[b'REQUEST_METHOD'],

338

method=env[b'REQUEST_METHOD'],

341

url=fullurl,

339

url=fullurl,

342

baseurl=baseurl,

340

baseurl=baseurl,

343

advertisedurl=advertisedfullurl,

341

advertisedurl=advertisedfullurl,

344

advertisedbaseurl=advertisedbaseurl,

342

advertisedbaseurl=advertisedbaseurl,

345

urlscheme=env[b'wsgi.url_scheme'],

343

urlscheme=env[b'wsgi.url_scheme'],

346

remoteuser=env.get(b'REMOTE_USER'),

344

remoteuser=env.get(b'REMOTE_USER'),

347

remotehost=env.get(b'REMOTE_HOST'),

345

remotehost=env.get(b'REMOTE_HOST'),

348

apppath=apppath,

346

apppath=apppath,

349

dispatchparts=dispatchparts,

347

dispatchparts=dispatchparts,

350

dispatchpath=dispatchpath,

348

dispatchpath=dispatchpath,

351

reponame=reponame,

349

reponame=reponame,

352

querystring=querystring,

350

querystring=querystring,

353

qsparams=qsparams,

351

qsparams=qsparams,

354

headers=headers,

352

headers=headers,

355

bodyfh=bodyfh,

353

bodyfh=bodyfh,

356

rawenv=env,

354

rawenv=env,

357

)

355

)

358

356

359

357

360

class offsettrackingwriter(object):

358

class offsettrackingwriter(object):

361

"""A file object like object that is append only and tracks write count.

359

"""A file object like object that is append only and tracks write count.

362

360

363

Instances are bound to a callable. This callable is called with data

361

Instances are bound to a callable. This callable is called with data

364

whenever a ``write()`` is attempted.

362

whenever a ``write()`` is attempted.

365

363

366

Instances track the amount of written data so they can answer ``tell()``

364

Instances track the amount of written data so they can answer ``tell()``

367

requests.

365

requests.

368

366

369

The intent of this class is to wrap the ``write()`` function returned by

367

The intent of this class is to wrap the ``write()`` function returned by

370

a WSGI ``start_response()`` function. Since ``write()`` is a callable and

368

a WSGI ``start_response()`` function. Since ``write()`` is a callable and

371

not a file object, it doesn't implement other file object methods.

369

not a file object, it doesn't implement other file object methods.

372

"""

370

"""

373

371

374

def __init__(self, writefn):

372

def __init__(self, writefn):

375

self._write = writefn

373

self._write = writefn

376

self._offset = 0

374

self._offset = 0

377

375

378

def write(self, s):

376

def write(self, s):

379

res = self._write(s)

377

res = self._write(s)

380

# Some Python objects don't report the number of bytes written.

378

# Some Python objects don't report the number of bytes written.

381

if res is None:

379

if res is None:

382

self._offset += len(s)

380

self._offset += len(s)

383

else:

381

else:

384

self._offset += res

382

self._offset += res

385

383

386

def flush(self):

384

def flush(self):

387

pass

385

pass

388

386

389

def tell(self):

387

def tell(self):

390

return self._offset

388

return self._offset

391

389

392

390

393

class wsgiresponse(object):

391

class wsgiresponse(object):

394

"""Represents a response to a WSGI request.

392

"""Represents a response to a WSGI request.

395

393

396

A response consists of a status line, headers, and a body.

394

A response consists of a status line, headers, and a body.

397

395

398

Consumers must populate the ``status`` and ``headers`` fields and

396

Consumers must populate the ``status`` and ``headers`` fields and

399

make a call to a ``setbody*()`` method before the response can be

397

make a call to a ``setbody*()`` method before the response can be

400

issued.

398

issued.

401

399

402

When it is time to start sending the response over the wire,

400

When it is time to start sending the response over the wire,

403

``sendresponse()`` is called. It handles emitting the header portion

401

``sendresponse()`` is called. It handles emitting the header portion

404

of the response message. It then yields chunks of body data to be

402

of the response message. It then yields chunks of body data to be

405

written to the peer. Typically, the WSGI application itself calls

403

written to the peer. Typically, the WSGI application itself calls

406

and returns the value from ``sendresponse()``.

404

and returns the value from ``sendresponse()``.

407

"""

405

"""

408

406

409

def __init__(self, req, startresponse):

407

def __init__(self, req, startresponse):

410

"""Create an empty response tied to a specific request.

408

"""Create an empty response tied to a specific request.

411

409

412

``req`` is a ``parsedrequest``. ``startresponse`` is the

410

``req`` is a ``parsedrequest``. ``startresponse`` is the

413

``start_response`` function passed to the WSGI application.

411

``start_response`` function passed to the WSGI application.

414

"""

412

"""

415

self._req = req

413

self._req = req

416

self._startresponse = startresponse

414

self._startresponse = startresponse

417

415

418

self.status = None

416

self.status = None

419

from . import wsgiheaders # avoid cycle

417

from . import wsgiheaders # avoid cycle

420

418

421

self.headers = wsgiheaders.Headers([])

419

self.headers = wsgiheaders.Headers([])

422

420

423

self._bodybytes = None

421

self._bodybytes = None

424

self._bodygen = None

422

self._bodygen = None

425

self._bodywillwrite = False

423

self._bodywillwrite = False

426

self._started = False

424

self._started = False

427

self._bodywritefn = None

425

self._bodywritefn = None

428

426

429

def _verifybody(self):

427

def _verifybody(self):

430

if (

428

if (

431

self._bodybytes is not None

429

self._bodybytes is not None

432

or self._bodygen is not None

430

or self._bodygen is not None

433

or self._bodywillwrite

431

or self._bodywillwrite

434

):

432

):

435

raise error.ProgrammingError(b'cannot define body multiple times')

433

raise error.ProgrammingError(b'cannot define body multiple times')

436

434

437

def setbodybytes(self, b):

435

def setbodybytes(self, b):

438

"""Define the response body as static bytes.

436

"""Define the response body as static bytes.

439

437

440

The empty string signals that there is no response body.

438

The empty string signals that there is no response body.

441

"""

439

"""

442

self._verifybody()

440

self._verifybody()

443

self._bodybytes = b

441

self._bodybytes = b

444

self.headers[b'Content-Length'] = b'%d' % len(b)

442

self.headers[b'Content-Length'] = b'%d' % len(b)

445

443

446

def setbodygen(self, gen):

444

def setbodygen(self, gen):

447

"""Define the response body as a generator of bytes."""

445

"""Define the response body as a generator of bytes."""

448

self._verifybody()

446

self._verifybody()

449

self._bodygen = gen

447

self._bodygen = gen

450

448

451

def setbodywillwrite(self):

449

def setbodywillwrite(self):

452

"""Signal an intent to use write() to emit the response body.

450

"""Signal an intent to use write() to emit the response body.

453

451

454

**This is the least preferred way to send a body.**

452

**This is the least preferred way to send a body.**

455

453

456

It is preferred for WSGI applications to emit a generator of chunks

454

It is preferred for WSGI applications to emit a generator of chunks

457

constituting the response body. However, some consumers can't emit

455

constituting the response body. However, some consumers can't emit

458

data this way. So, WSGI provides a way to obtain a ``write(data)``

456

data this way. So, WSGI provides a way to obtain a ``write(data)``

459

function that can be used to synchronously perform an unbuffered

457

function that can be used to synchronously perform an unbuffered

460

write.

458

write.

461

459

462

Calling this function signals an intent to produce the body in this

460

Calling this function signals an intent to produce the body in this

463

manner.

461

manner.

464

"""

462

"""

465

self._verifybody()

463

self._verifybody()

466

self._bodywillwrite = True

464

self._bodywillwrite = True

467

465

468

def sendresponse(self):

466

def sendresponse(self):

469

"""Send the generated response to the client.

467

"""Send the generated response to the client.

470

468

471

Before this is called, ``status`` must be set and one of

469

Before this is called, ``status`` must be set and one of

472

``setbodybytes()`` or ``setbodygen()`` must be called.

470

``setbodybytes()`` or ``setbodygen()`` must be called.

473

471

474

Calling this method multiple times is not allowed.

472

Calling this method multiple times is not allowed.

475

"""

473

"""

476

if self._started:

474

if self._started:

477

raise error.ProgrammingError(

475

raise error.ProgrammingError(

478

b'sendresponse() called multiple times'

476

b'sendresponse() called multiple times'

479

)

477

)

480

478

481

self._started = True

479

self._started = True

482

480

483

if not self.status:

481

if not self.status:

484

raise error.ProgrammingError(b'status line not defined')

482

raise error.ProgrammingError(b'status line not defined')

485

483

486

if (

484

if (

487

self._bodybytes is None

485

self._bodybytes is None

488

and self._bodygen is None

486

and self._bodygen is None

489

and not self._bodywillwrite

487

and not self._bodywillwrite

490

):

488

):

491

raise error.ProgrammingError(b'response body not defined')

489

raise error.ProgrammingError(b'response body not defined')

492

490

493

# RFC 7232 Section 4.1 states that a 304 MUST generate one of

491

# RFC 7232 Section 4.1 states that a 304 MUST generate one of

494

# {Cache-Control, Content-Location, Date, ETag, Expires, Vary}

492

# {Cache-Control, Content-Location, Date, ETag, Expires, Vary}

495

# and SHOULD NOT generate other headers unless they could be used

493

# and SHOULD NOT generate other headers unless they could be used

496

# to guide cache updates. Furthermore, RFC 7230 Section 3.3.2

494

# to guide cache updates. Furthermore, RFC 7230 Section 3.3.2

497

# states that no response body can be issued. Content-Length can

495

# states that no response body can be issued. Content-Length can

498

# be sent. But if it is present, it should be the size of the response

496

# be sent. But if it is present, it should be the size of the response

499

# that wasn't transferred.

497

# that wasn't transferred.

500

if self.status.startswith(b'304 '):

498

if self.status.startswith(b'304 '):

501

# setbodybytes('') will set C-L to 0. This doesn't conform with the

499

# setbodybytes('') will set C-L to 0. This doesn't conform with the

502

# spec. So remove it.

500

# spec. So remove it.

503

if self.headers.get(b'Content-Length') == b'0':

501

if self.headers.get(b'Content-Length') == b'0':

504

del self.headers[b'Content-Length']

502

del self.headers[b'Content-Length']

505

503

506

# Strictly speaking, this is too strict. But until it causes

504

# Strictly speaking, this is too strict. But until it causes

507

# problems, let's be strict.

505

# problems, let's be strict.

508

badheaders = {

506

badheaders = {

509

k

507

k

510

for k in self.headers.keys()

508

for k in self.headers.keys()

511

if k.lower()

509

if k.lower()

512

not in (

510

not in (

513

b'date',

511

b'date',

514

b'etag',

512

b'etag',

515

b'expires',

513

b'expires',

516

b'cache-control',

514

b'cache-control',

517

b'content-location',

515

b'content-location',

518

b'content-security-policy',

516

b'content-security-policy',

519

b'vary',

517

b'vary',

520

)

518

)

521

}

519

}

522

if badheaders:

520

if badheaders:

523

raise error.ProgrammingError(

521

raise error.ProgrammingError(

524

b'illegal header on 304 response: %s'

522

b'illegal header on 304 response: %s'

525

% b', '.join(sorted(badheaders))

523

% b', '.join(sorted(badheaders))

526

)

524

)

527

525

528

if self._bodygen is not None or self._bodywillwrite:

526

if self._bodygen is not None or self._bodywillwrite:

529

raise error.ProgrammingError(

527

raise error.ProgrammingError(

530

b"must use setbodybytes('') with 304 responses"

528

b"must use setbodybytes('') with 304 responses"

531

)

529

)

532

530

533

# Various HTTP clients (notably httplib) won't read the HTTP response

531

# Various HTTP clients (notably httplib) won't read the HTTP response

534

# until the HTTP request has been sent in full. If servers (us) send a

532

# until the HTTP request has been sent in full. If servers (us) send a

535

# response before the HTTP request has been fully sent, the connection

533

# response before the HTTP request has been fully sent, the connection

536

# may deadlock because neither end is reading.

534

# may deadlock because neither end is reading.

537

#

535

#

538

# We work around this by "draining" the request data before

536

# We work around this by "draining" the request data before

539

# sending any response in some conditions.

537

# sending any response in some conditions.

540

drain = False

538

drain = False

541

close = False

539

close = False

542

540

543

# If the client sent Expect: 100-continue, we assume it is smart enough

541

# If the client sent Expect: 100-continue, we assume it is smart enough

544

# to deal with the server sending a response before reading the request.

542

# to deal with the server sending a response before reading the request.

545

# (httplib doesn't do this.)

543

# (httplib doesn't do this.)

546

if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':

544

if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':

547

pass

545

pass

548

# Only tend to request methods that have bodies. Strictly speaking,

546

# Only tend to request methods that have bodies. Strictly speaking,

549

# we should sniff for a body. But this is fine for our existing

547

# we should sniff for a body. But this is fine for our existing

550

# WSGI applications.

548

# WSGI applications.

551

elif self._req.method not in (b'POST', b'PUT'):

549

elif self._req.method not in (b'POST', b'PUT'):

552

pass

550

pass

553

else:

551

else:

554

# If we don't know how much data to read, there's no guarantee

552

# If we don't know how much data to read, there's no guarantee

555

# that we can drain the request responsibly. The WSGI

553

# that we can drain the request responsibly. The WSGI

556

# specification only says that servers *should* ensure the

554

# specification only says that servers *should* ensure the

557

# input stream doesn't overrun the actual request. So there's

555

# input stream doesn't overrun the actual request. So there's

558

# no guarantee that reading until EOF won't corrupt the stream

556

# no guarantee that reading until EOF won't corrupt the stream

559

# state.

557

# state.

560

if not isinstance(self._req.bodyfh, util.cappedreader):

558

if not isinstance(self._req.bodyfh, util.cappedreader):

561

close = True

559

close = True

562

else:

560

else:

563

# We /could/ only drain certain HTTP response codes. But 200 and

561

# We /could/ only drain certain HTTP response codes. But 200 and

564

# non-200 wire protocol responses both require draining. Since

562

# non-200 wire protocol responses both require draining. Since

565

# we have a capped reader in place for all situations where we

563

# we have a capped reader in place for all situations where we

566

# drain, it is safe to read from that stream. We'll either do

564

# drain, it is safe to read from that stream. We'll either do

567

# a drain or no-op if we're already at EOF.

565

# a drain or no-op if we're already at EOF.

568

drain = True

566

drain = True

569

567

570

if close:

568

if close:

571

self.headers[b'Connection'] = b'Close'

569

self.headers[b'Connection'] = b'Close'

572

570

573

if drain:

571

if drain:

574

assert isinstance(self._req.bodyfh, util.cappedreader)

572

assert isinstance(self._req.bodyfh, util.cappedreader)

575

while True:

573

while True:

576

chunk = self._req.bodyfh.read(32768)

574

chunk = self._req.bodyfh.read(32768)

577

if not chunk:

575

if not chunk:

578

break

576

break

579

577

580

strheaders = [

578

strheaders = [

581

(pycompat.strurl(k), pycompat.strurl(v))

579

(pycompat.strurl(k), pycompat.strurl(v))

582

for k, v in self.headers.items()

580

for k, v in self.headers.items()

583

]

581

]

584

write = self._startresponse(pycompat.sysstr(self.status), strheaders)

582

write = self._startresponse(pycompat.sysstr(self.status), strheaders)

585

583

586

if self._bodybytes:

584

if self._bodybytes:

587

yield self._bodybytes

585

yield self._bodybytes

588

elif self._bodygen:

586

elif self._bodygen:

589

for chunk in self._bodygen:

587

for chunk in self._bodygen:

590

# PEP-3333 says that output must be bytes. And some WSGI

588

# PEP-3333 says that output must be bytes. And some WSGI

591

# implementations enforce this. We cast bytes-like types here

589

# implementations enforce this. We cast bytes-like types here

592

# for convenience.

590

# for convenience.

593

if isinstance(chunk, bytearray):

591

if isinstance(chunk, bytearray):

594

chunk = bytes(chunk)

592

chunk = bytes(chunk)

595

593

596

yield chunk

594

yield chunk

597

elif self._bodywillwrite:

595

elif self._bodywillwrite:

598

self._bodywritefn = write

596

self._bodywritefn = write

599

else:

597

else:

600

error.ProgrammingError(b'do not know how to send body')

598

error.ProgrammingError(b'do not know how to send body')

601

599

602

def getbodyfile(self):

600

def getbodyfile(self):

603

"""Obtain a file object like object representing the response body.

601

"""Obtain a file object like object representing the response body.

604

602

605

For this to work, you must call ``setbodywillwrite()`` and then

603

For this to work, you must call ``setbodywillwrite()`` and then

606

``sendresponse()`` first. ``sendresponse()`` is a generator and the

604

``sendresponse()`` first. ``sendresponse()`` is a generator and the

607

function won't run to completion unless the generator is advanced. The

605

function won't run to completion unless the generator is advanced. The

608

generator yields not items. The easiest way to consume it is with

606

generator yields not items. The easiest way to consume it is with

609

``list(res.sendresponse())``, which should resolve to an empty list -

607

``list(res.sendresponse())``, which should resolve to an empty list -

610

``[]``.

608

``[]``.

611

"""

609

"""

612

if not self._bodywillwrite:

610

if not self._bodywillwrite:

613

raise error.ProgrammingError(b'must call setbodywillwrite() first')

611

raise error.ProgrammingError(b'must call setbodywillwrite() first')

614

612

615

if not self._started:

613

if not self._started:

616

raise error.ProgrammingError(

614

raise error.ProgrammingError(

617

b'must call sendresponse() first; did '

615

b'must call sendresponse() first; did '

618

b'you remember to consume it since it '

616

b'you remember to consume it since it '

619

b'is a generator?'

617

b'is a generator?'

620

)

618

)

621

619

622

assert self._bodywritefn

620

assert self._bodywritefn

623

return offsettrackingwriter(self._bodywritefn)

621

return offsettrackingwriter(self._bodywritefn)

624

622

625

623

626

def wsgiapplication(app_maker):

624

def wsgiapplication(app_maker):

627

"""For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

625

"""For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

628

can and should now be used as a WSGI application."""

626

can and should now be used as a WSGI application."""

629

application = app_maker()

627

application = app_maker()

630

628

631

def run_wsgi(env, respond):

629

def run_wsgi(env, respond):

632

return application(env, respond)

630

return application(env, respond)

633

631

634

return run_wsgi

632

return run_wsgi

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # hgweb/request.py - An http request from either CGI or the standalone server.
             #
             # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
             # Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             # import wsgiref.validate
             from ..thirdparty import attr
             from .. import (
                 encoding,
                 error,
                 pycompat,
                 util,
             )
             from ..utils import (
                 urlutil,
             )
             class multidict(object):
                 """A dict like object that can store multiple values for a key.
                 Used to store parsed request parameters.
                 This is inspired by WebOb's class of the same name.
                 """
                 def __init__(self):
                     self._items = {}
                 def __getitem__(self, key):
                     """Returns the last set value for a key."""
                     return self._items[key][-1]
                 def __setitem__(self, key, value):
                     """Replace a values for a key with a new value."""
                     self._items[key] = [value]
                 def __delitem__(self, key):
                     """Delete all values for a key."""
                     del self._items[key]
                 def __contains__(self, key):
                     return key in self._items
                 def __len__(self):
                     return len(self._items)
                 def get(self, key, default=None):
                     try:
                         return self.__getitem__(key)
                     except KeyError:
                         return default
                 def add(self, key, value):
                     """Add a new value for a key. Does not replace existing values."""
                     self._items.setdefault(key, []).append(value)
                 def getall(self, key):
                     """Obtains all values for a key."""
                     return self._items.get(key, [])
                 def getone(self, key):
                     """Obtain a single value for a key.
                     Raises KeyError if key not defined or it has multiple values set.
                     """
                     vals = self._items[key]
                     if len(vals) > 1:
                         raise KeyError(b'multiple values for %r' % key)
                     return vals[0]
                 def asdictoflists(self):
                     return {k: list(v) for k, v in pycompat.iteritems(self._items)}
             @attr.s(frozen=True)
             class parsedrequest(object):
                 """Represents a parsed WSGI request.
                 Contains both parsed parameters as well as a handle on the input stream.
                 """
                 # Request method.
                 method = attr.ib()
                 # Full URL for this request.
                 url = attr.ib()
                 # URL without any path components. Just <proto>://<host><port>.
                 baseurl = attr.ib()
                 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
                 # of HTTP: Host header for hostname. This is likely what clients used.
                 advertisedurl = attr.ib()
                 advertisedbaseurl = attr.ib()
                 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
                 urlscheme = attr.ib()
                 # Value of REMOTE_USER, if set, or None.
                 remoteuser = attr.ib()
                 # Value of REMOTE_HOST, if set, or None.
                 remotehost = attr.ib()
                 # Relative WSGI application path. If defined, will begin with a
                 # ``/``.
                 apppath = attr.ib()
                 # List of path parts to be used for dispatch.
                 dispatchparts = attr.ib()
                 # URL path component (no query string) used for dispatch. Can be
                 # ``None`` to signal no path component given to the request, an
                 # empty string to signal a request to the application's root URL,
                 # or a string not beginning with ``/`` containing the requested
                 # path under the application.
                 dispatchpath = attr.ib()
                 # The name of the repository being accessed.
                 reponame = attr.ib()
                 # Raw query string (part after "?" in URL).
                 querystring = attr.ib()
                 # multidict of query string parameters.
                 qsparams = attr.ib()
                 # wsgiref.headers.Headers instance. Operates like a dict with case
                 # insensitive keys.
                 headers = attr.ib()
                 # Request body input stream.
                 bodyfh = attr.ib()
                 # WSGI environment dict, unmodified.
                 rawenv = attr.ib()
             def parserequestfromenv(env, reponame=None, altbaseurl=None, bodyfh=None):
                 """Parse URL components from environment variables.
                 WSGI defines request attributes via environment variables. This function
                 parses the environment variables into a data structure.
                 If ``reponame`` is defined, the leading path components matching that
                 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
                 This simulates the world view of a WSGI application that processes
                 requests from the base URL of a repo.
                 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
                 is defined, it is used - instead of the WSGI environment variables - for
                 constructing URL components up to and including the WSGI application path.
                 For example, if the current WSGI application is at ``/repo`` and a request
                 is made to ``/rev/@`` with this argument set to
                 ``http://myserver:9000/prefix``, the URL and path components will resolve as
                 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
                 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
                 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
                 ``bodyfh`` can be used to specify a file object to read the request body
                 from. If not defined, ``wsgi.input`` from the environment dict is used.
                 """
                 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
                 # We first validate that the incoming object conforms with the WSGI spec.
                 # We only want to be dealing with spec-conforming WSGI implementations.
                 # TODO enable this once we fix internal violations.
                 # wsgiref.validate.check_environ(env)
-                # PEP-0333 states that environment keys and values are native strings
+                # PEP-0333 states that environment keys and values are native strings.
-                # (bytes on Python 2 and str on Python 3). The code points for the Unicode
+                # The code points for the Unicode strings on Python 3 must be between
-                # strings on Python 3 must be between \00000-\000FF. We deal with bytes
+                # \00000-\000FF. We deal with bytes in Mercurial, so mass convert string
-                # in Mercurial, so mass convert string keys and values to bytes.
+                # keys and values to bytes.
-                if pycompat.ispy3:
+                def tobytes(s):
+                    if not isinstance(s, str):
+                        return s
+                    if pycompat.iswindows:
+                        # This is what mercurial.encoding does for os.environ on
+                        # Windows.
+                        return encoding.strtolocal(s)
+                    else:
+                        # This is what is documented to be used for os.environ on Unix.
+                        return pycompat.fsencode(s)
-                    def tobytes(s):
+                env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)}
-                        if not isinstance(s, str):
-                            return s
-                        if pycompat.iswindows:
-                            # This is what mercurial.encoding does for os.environ on
-                            # Windows.
-                            return encoding.strtolocal(s)
-                        else:
-                            # This is what is documented to be used for os.environ on Unix.
-                            return pycompat.fsencode(s)
-                    env = {tobytes(k): tobytes(v) for k, v in pycompat.iteritems(env)}
                 # Some hosting solutions are emulating hgwebdir, and dispatching directly
                 # to an hgweb instance using this environment variable.  This was always
                 # checked prior to d7fd203e36cc; keep doing so to avoid breaking them.
                 if not reponame:
                     reponame = env.get(b'REPO_NAME')
                 if altbaseurl:
                     altbaseurl = urlutil.url(altbaseurl)
                 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
                 # the environment variables.
                 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
                 # how URLs are reconstructed.
                 fullurl = env[b'wsgi.url_scheme'] + b'://'
                 if altbaseurl and altbaseurl.scheme:
                     advertisedfullurl = altbaseurl.scheme + b'://'
                 else:
                     advertisedfullurl = fullurl
                 def addport(s, port):
                     if s.startswith(b'https://'):
                         if port != b'443':
                             s += b':' + port
                     else:
                         if port != b'80':
                             s += b':' + port
                     return s
                 if env.get(b'HTTP_HOST'):
                     fullurl += env[b'HTTP_HOST']
                 else:
                     fullurl += env[b'SERVER_NAME']
                     fullurl = addport(fullurl, env[b'SERVER_PORT'])
                 if altbaseurl and altbaseurl.host:
                     advertisedfullurl += altbaseurl.host
                     if altbaseurl.port:
                         port = altbaseurl.port
                     elif altbaseurl.scheme == b'http' and not altbaseurl.port:
                         port = b'80'
                     elif altbaseurl.scheme == b'https' and not altbaseurl.port:
                         port = b'443'
                     else:
                         port = env[b'SERVER_PORT']
                     advertisedfullurl = addport(advertisedfullurl, port)
                 else:
                     advertisedfullurl += env[b'SERVER_NAME']
                     advertisedfullurl = addport(advertisedfullurl, env[b'SERVER_PORT'])
                 baseurl = fullurl
                 advertisedbaseurl = advertisedfullurl
                 fullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
                 fullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
                 if altbaseurl:
                     path = altbaseurl.path or b''
                     if path and not path.startswith(b'/'):
                         path = b'/' + path
                     advertisedfullurl += util.urlreq.quote(path)
                 else:
                     advertisedfullurl += util.urlreq.quote(env.get(b'SCRIPT_NAME', b''))
                 advertisedfullurl += util.urlreq.quote(env.get(b'PATH_INFO', b''))
                 if env.get(b'QUERY_STRING'):
                     fullurl += b'?' + env[b'QUERY_STRING']
                     advertisedfullurl += b'?' + env[b'QUERY_STRING']
                 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
                 # that represents the repository being dispatched to. When computing
                 # the dispatch info, we ignore these leading path components.
                 if altbaseurl:
                     apppath = altbaseurl.path or b''
                     if apppath and not apppath.startswith(b'/'):
                         apppath = b'/' + apppath
                 else:
                     apppath = env.get(b'SCRIPT_NAME', b'')
                 if reponame:
                     repoprefix = b'/' + reponame.strip(b'/')
                     if not env.get(b'PATH_INFO'):
                         raise error.ProgrammingError(b'reponame requires PATH_INFO')
                     if not env[b'PATH_INFO'].startswith(repoprefix):
                         raise error.ProgrammingError(
                             b'PATH_INFO does not begin with repo '
                             b'name: %s (%s)' % (env[b'PATH_INFO'], reponame)
                         )
                     dispatchpath = env[b'PATH_INFO'][len(repoprefix) :]
                     if dispatchpath and not dispatchpath.startswith(b'/'):
                         raise error.ProgrammingError(
                             b'reponame prefix of PATH_INFO does '
                             b'not end at path delimiter: %s (%s)'
                             % (env[b'PATH_INFO'], reponame)
                         )
                     apppath = apppath.rstrip(b'/') + repoprefix
                     dispatchparts = dispatchpath.strip(b'/').split(b'/')
                     dispatchpath = b'/'.join(dispatchparts)
                 elif b'PATH_INFO' in env:
                     if env[b'PATH_INFO'].strip(b'/'):
                         dispatchparts = env[b'PATH_INFO'].strip(b'/').split(b'/')
                         dispatchpath = b'/'.join(dispatchparts)
                     else:
                         dispatchparts = []
                         dispatchpath = b''
                 else:
                     dispatchparts = []
                     dispatchpath = None
                 querystring = env.get(b'QUERY_STRING', b'')
                 # We store as a list so we have ordering information. We also store as
                 # a dict to facilitate fast lookup.
                 qsparams = multidict()
                 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
                     qsparams.add(k, v)
                 # HTTP_* keys contain HTTP request headers. The Headers structure should
                 # perform case normalization for us. We just rewrite underscore to dash
                 # so keys match what likely went over the wire.
                 headers = []
                 for k, v in pycompat.iteritems(env):
                     if k.startswith(b'HTTP_'):
                         headers.append((k[len(b'HTTP_') :].replace(b'_', b'-'), v))
                 from . import wsgiheaders  # avoid cycle
                 headers = wsgiheaders.Headers(headers)
                 # This is kind of a lie because the HTTP header wasn't explicitly
                 # sent. But for all intents and purposes it should be OK to lie about
                 # this, since a consumer will either either value to determine how many
                 # bytes are available to read.
                 if b'CONTENT_LENGTH' in env and b'HTTP_CONTENT_LENGTH' not in env:
                     headers[b'Content-Length'] = env[b'CONTENT_LENGTH']
                 if b'CONTENT_TYPE' in env and b'HTTP_CONTENT_TYPE' not in env:
                     headers[b'Content-Type'] = env[b'CONTENT_TYPE']
                 if bodyfh is None:
                     bodyfh = env[b'wsgi.input']
                     if b'Content-Length' in headers:
                         bodyfh = util.cappedreader(
                             bodyfh, int(headers[b'Content-Length'] or b'0')
                         )
                 return parsedrequest(
                     method=env[b'REQUEST_METHOD'],
                     url=fullurl,
                     baseurl=baseurl,
                     advertisedurl=advertisedfullurl,
                     advertisedbaseurl=advertisedbaseurl,
                     urlscheme=env[b'wsgi.url_scheme'],
                     remoteuser=env.get(b'REMOTE_USER'),
                     remotehost=env.get(b'REMOTE_HOST'),
                     apppath=apppath,
                     dispatchparts=dispatchparts,
                     dispatchpath=dispatchpath,
                     reponame=reponame,
                     querystring=querystring,
                     qsparams=qsparams,
                     headers=headers,
                     bodyfh=bodyfh,
                     rawenv=env,
                 )
             class offsettrackingwriter(object):
                 """A file object like object that is append only and tracks write count.
                 Instances are bound to a callable. This callable is called with data
                 whenever a ``write()`` is attempted.
                 Instances track the amount of written data so they can answer ``tell()``
                 requests.
                 The intent of this class is to wrap the ``write()`` function returned by
                 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
                 not a file object, it doesn't implement other file object methods.
                 """
                 def __init__(self, writefn):
                     self._write = writefn
                     self._offset = 0
                 def write(self, s):
                     res = self._write(s)
                     # Some Python objects don't report the number of bytes written.
                     if res is None:
                         self._offset += len(s)
                     else:
                         self._offset += res
                 def flush(self):
                     pass
                 def tell(self):
                     return self._offset
             class wsgiresponse(object):
                 """Represents a response to a WSGI request.
                 A response consists of a status line, headers, and a body.
                 Consumers must populate the ``status`` and ``headers`` fields and
                 make a call to a ``setbody*()`` method before the response can be
                 issued.
                 When it is time to start sending the response over the wire,
                 ``sendresponse()`` is called. It handles emitting the header portion
                 of the response message. It then yields chunks of body data to be
                 written to the peer. Typically, the WSGI application itself calls
                 and returns the value from ``sendresponse()``.
                 """
                 def __init__(self, req, startresponse):
                     """Create an empty response tied to a specific request.
                     ``req`` is a ``parsedrequest``. ``startresponse`` is the
                     ``start_response`` function passed to the WSGI application.
                     """
                     self._req = req
                     self._startresponse = startresponse
                     self.status = None
                     from . import wsgiheaders  # avoid cycle
                     self.headers = wsgiheaders.Headers([])
                     self._bodybytes = None
                     self._bodygen = None
                     self._bodywillwrite = False
                     self._started = False
                     self._bodywritefn = None
                 def _verifybody(self):
                     if (
                         self._bodybytes is not None
                         or self._bodygen is not None
                         or self._bodywillwrite
                     ):
                         raise error.ProgrammingError(b'cannot define body multiple times')
                 def setbodybytes(self, b):
                     """Define the response body as static bytes.
                     The empty string signals that there is no response body.
                     """
                     self._verifybody()
                     self._bodybytes = b
                     self.headers[b'Content-Length'] = b'%d' % len(b)
                 def setbodygen(self, gen):
                     """Define the response body as a generator of bytes."""
                     self._verifybody()
                     self._bodygen = gen
                 def setbodywillwrite(self):
                     """Signal an intent to use write() to emit the response body.
                     **This is the least preferred way to send a body.**
                     It is preferred for WSGI applications to emit a generator of chunks
                     constituting the response body. However, some consumers can't emit
                     data this way. So, WSGI provides a way to obtain a ``write(data)``
                     function that can be used to synchronously perform an unbuffered
                     write.
                     Calling this function signals an intent to produce the body in this
                     manner.
                     """
                     self._verifybody()
                     self._bodywillwrite = True
                 def sendresponse(self):
                     """Send the generated response to the client.
                     Before this is called, ``status`` must be set and one of
                     ``setbodybytes()`` or ``setbodygen()`` must be called.
                     Calling this method multiple times is not allowed.
                     """
                     if self._started:
                         raise error.ProgrammingError(
                             b'sendresponse() called multiple times'
                         )
                     self._started = True
                     if not self.status:
                         raise error.ProgrammingError(b'status line not defined')
                     if (
                         self._bodybytes is None
                         and self._bodygen is None
                         and not self._bodywillwrite
                     ):
                         raise error.ProgrammingError(b'response body not defined')
                     # RFC 7232 Section 4.1 states that a 304 MUST generate one of
                     # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
                     # and SHOULD NOT generate other headers unless they could be used
                     # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
                     # states that no response body can be issued. Content-Length can
                     # be sent. But if it is present, it should be the size of the response
                     # that wasn't transferred.
                     if self.status.startswith(b'304 '):
                         # setbodybytes('') will set C-L to 0. This doesn't conform with the
                         # spec. So remove it.
                         if self.headers.get(b'Content-Length') == b'0':
                             del self.headers[b'Content-Length']
                         # Strictly speaking, this is too strict. But until it causes
                         # problems, let's be strict.
                         badheaders = {
                             k
                             for k in self.headers.keys()
                             if k.lower()
                             not in (
                                 b'date',
                                 b'etag',
                                 b'expires',
                                 b'cache-control',
                                 b'content-location',
                                 b'content-security-policy',
                                 b'vary',
                             )
                         }
                         if badheaders:
                             raise error.ProgrammingError(
                                 b'illegal header on 304 response: %s'
                                 % b', '.join(sorted(badheaders))
                             )
                         if self._bodygen is not None or self._bodywillwrite:
                             raise error.ProgrammingError(
                                 b"must use setbodybytes('') with 304 responses"
                             )
                     # Various HTTP clients (notably httplib) won't read the HTTP response
                     # until the HTTP request has been sent in full. If servers (us) send a
                     # response before the HTTP request has been fully sent, the connection
                     # may deadlock because neither end is reading.
                     #
                     # We work around this by "draining" the request data before
                     # sending any response in some conditions.
                     drain = False
                     close = False
                     # If the client sent Expect: 100-continue, we assume it is smart enough
                     # to deal with the server sending a response before reading the request.
                     # (httplib doesn't do this.)
                     if self._req.headers.get(b'Expect', b'').lower() == b'100-continue':
                         pass
                     # Only tend to request methods that have bodies. Strictly speaking,
                     # we should sniff for a body. But this is fine for our existing
                     # WSGI applications.
                     elif self._req.method not in (b'POST', b'PUT'):
                         pass
                     else:
                         # If we don't know how much data to read, there's no guarantee
                         # that we can drain the request responsibly. The WSGI
                         # specification only says that servers *should* ensure the
                         # input stream doesn't overrun the actual request. So there's
                         # no guarantee that reading until EOF won't corrupt the stream
                         # state.
                         if not isinstance(self._req.bodyfh, util.cappedreader):
                             close = True
                         else:
                             # We /could/ only drain certain HTTP response codes. But 200 and
                             # non-200 wire protocol responses both require draining. Since
                             # we have a capped reader in place for all situations where we
                             # drain, it is safe to read from that stream. We'll either do
                             # a drain or no-op if we're already at EOF.
                             drain = True
                     if close:
                         self.headers[b'Connection'] = b'Close'
                     if drain:
                         assert isinstance(self._req.bodyfh, util.cappedreader)
                         while True:
                             chunk = self._req.bodyfh.read(32768)
                             if not chunk:
                                 break
                     strheaders = [
                         (pycompat.strurl(k), pycompat.strurl(v))
                         for k, v in self.headers.items()
                     ]
                     write = self._startresponse(pycompat.sysstr(self.status), strheaders)
                     if self._bodybytes:
                         yield self._bodybytes
                     elif self._bodygen:
                         for chunk in self._bodygen:
                             # PEP-3333 says that output must be bytes. And some WSGI
                             # implementations enforce this. We cast bytes-like types here
                             # for convenience.
                             if isinstance(chunk, bytearray):
                                 chunk = bytes(chunk)
                             yield chunk
                     elif self._bodywillwrite:
                         self._bodywritefn = write
                     else:
                         error.ProgrammingError(b'do not know how to send body')
                 def getbodyfile(self):
                     """Obtain a file object like object representing the response body.
                     For this to work, you must call ``setbodywillwrite()`` and then
                     ``sendresponse()`` first. ``sendresponse()`` is a generator and the
                     function won't run to completion unless the generator is advanced. The
                     generator yields not items. The easiest way to consume it is with
                     ``list(res.sendresponse())``, which should resolve to an empty list -
                     ``[]``.
                     """
                     if not self._bodywillwrite:
                         raise error.ProgrammingError(b'must call setbodywillwrite() first')
                     if not self._started:
                         raise error.ProgrammingError(
                             b'must call sendresponse() first; did '
                             b'you remember to consume it since it '
                             b'is a generator?'
                         )
                     assert self._bodywritefn
                     return offsettrackingwriter(self._bodywritefn)
             def wsgiapplication(app_maker):
                 """For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
                 can and should now be used as a WSGI application."""
                 application = app_maker()
                 def run_wsgi(env, respond):
                     return application(env, respond)
                 return run_wsgi