upstream/mercurial-mirror Commit - r36875:422be995

1

# hgweb/request.py - An http request from either CGI or the standalone server.

1

# hgweb/request.py - An http request from either CGI or the standalone server.

2

#

2

#

3

4

5

#

5

#

6

# This software may be used and distributed according to the terms of the

6

# This software may be used and distributed according to the terms of the

7

# GNU General Public License version 2 or any later version.

7

# GNU General Public License version 2 or any later version.

8

9

from __future__ import absolute_import

9

from __future__ import absolute_import

10

11

import errno

11

import errno

12

import socket

12

import socket

13

import wsgiref.headers as wsgiheaders

13

import wsgiref.headers as wsgiheaders

14

#import wsgiref.validate

14

#import wsgiref.validate

15

16

from .common import (

16

from .common import (

17

ErrorResponse,

17

ErrorResponse,

18

HTTP_NOT_MODIFIED,

18

HTTP_NOT_MODIFIED,

19

statusmessage,

19

statusmessage,

20

)

20

)

21

22

from ..thirdparty import (

22

from ..thirdparty import (

23

attr,

23

attr,

24

)

24

)

25

from .. import (

25

from .. import (

26

pycompat,

26

pycompat,

27

util,

27

util,

28

)

28

)

29

30

shortcuts = {

31

'cl': [('cmd', ['changelog']), ('rev', None)],

32

'sl': [('cmd', ['shortlog']), ('rev', None)],

33

'cs': [('cmd', ['changeset']), ('node', None)],

34

'f': [('cmd', ['file']), ('filenode', None)],

35

'fl': [('cmd', ['filelog']), ('filenode', None)],

36

'fd': [('cmd', ['filediff']), ('node', None)],

37

'fa': [('cmd', ['annotate']), ('filenode', None)],

38

'mf': [('cmd', ['manifest']), ('manifest', None)],

39

'ca': [('cmd', ['archive']), ('node', None)],

40

'tags': [('cmd', ['tags'])],

41

'tip': [('cmd', ['changeset']), ('node', ['tip'])],

42

'static': [('cmd', ['static']), ('file', None)]

43

}

44

45

def normalize(form):

46

# first expand the shortcuts

47

for k in shortcuts:

48

if k in form:

49

for name, value in shortcuts[k]:

50

if value is None:

51

value = form[k]

52

form[name] = value

53

del form[k]

54

# And strip the values

55

bytesform = {}

56

for k, v in form.iteritems():

57

bytesform[pycompat.bytesurl(k)] = [

58

pycompat.bytesurl(i.strip()) for i in v]

59

return bytesform

60

61

@attr.s(frozen=True)

30

@attr.s(frozen=True)

62

class parsedrequest(object):

31

class parsedrequest(object):

63

"""Represents a parsed WSGI request.

32

"""Represents a parsed WSGI request.

64

33

65

Contains both parsed parameters as well as a handle on the input stream.

34

Contains both parsed parameters as well as a handle on the input stream.

66

"""

35

"""

67

36

68

# Request method.

37

# Request method.

69

method = attr.ib()

38

method = attr.ib()

70

# Full URL for this request.

39

# Full URL for this request.

71

url = attr.ib()

40

url = attr.ib()

72

# URL without any path components. Just <proto>://<host><port>.

41

# URL without any path components. Just <proto>://<host><port>.

73

baseurl = attr.ib()

42

baseurl = attr.ib()

74

# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead

43

# Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead

75

# of HTTP: Host header for hostname. This is likely what clients used.

44

# of HTTP: Host header for hostname. This is likely what clients used.

76

advertisedurl = attr.ib()

45

advertisedurl = attr.ib()

77

advertisedbaseurl = attr.ib()

46

advertisedbaseurl = attr.ib()

78

# WSGI application path.

47

# WSGI application path.

79

apppath = attr.ib()

48

apppath = attr.ib()

80

# List of path parts to be used for dispatch.

49

# List of path parts to be used for dispatch.

81

dispatchparts = attr.ib()

50

dispatchparts = attr.ib()

82

# URL path component (no query string) used for dispatch.

51

# URL path component (no query string) used for dispatch.

83

dispatchpath = attr.ib()

52

dispatchpath = attr.ib()

84

# Whether there is a path component to this request. This can be true

53

# Whether there is a path component to this request. This can be true

85

# when ``dispatchpath`` is empty due to REPO_NAME muckery.

54

# when ``dispatchpath`` is empty due to REPO_NAME muckery.

86

havepathinfo = attr.ib()

55

havepathinfo = attr.ib()

87

# Raw query string (part after "?" in URL).

56

# Raw query string (part after "?" in URL).

88

querystring = attr.ib()

57

querystring = attr.ib()

89

# List of 2-tuples of query string arguments.

58

# List of 2-tuples of query string arguments.

90

querystringlist = attr.ib()

59

querystringlist = attr.ib()

91

# Dict of query string arguments. Values are lists with at least 1 item.

60

# Dict of query string arguments. Values are lists with at least 1 item.

92

querystringdict = attr.ib()

61

querystringdict = attr.ib()

93

# wsgiref.headers.Headers instance. Operates like a dict with case

62

# wsgiref.headers.Headers instance. Operates like a dict with case

94

# insensitive keys.

63

# insensitive keys.

95

headers = attr.ib()

64

headers = attr.ib()

96

# Request body input stream.

65

# Request body input stream.

97

bodyfh = attr.ib()

66

bodyfh = attr.ib()

98

67

99

def parserequestfromenv(env, bodyfh):

68

def parserequestfromenv(env, bodyfh):

100

"""Parse URL components from environment variables.

69

"""Parse URL components from environment variables.

101

70

102

WSGI defines request attributes via environment variables. This function

71

WSGI defines request attributes via environment variables. This function

103

parses the environment variables into a data structure.

72

parses the environment variables into a data structure.

104

"""

73

"""

105

# PEP-0333 defines the WSGI spec and is a useful reference for this code.

74

# PEP-0333 defines the WSGI spec and is a useful reference for this code.

106

75

107

# We first validate that the incoming object conforms with the WSGI spec.

76

# We first validate that the incoming object conforms with the WSGI spec.

108

# We only want to be dealing with spec-conforming WSGI implementations.

77

# We only want to be dealing with spec-conforming WSGI implementations.

109

# TODO enable this once we fix internal violations.

78

# TODO enable this once we fix internal violations.

110

#wsgiref.validate.check_environ(env)

79

#wsgiref.validate.check_environ(env)

111

80

112

# PEP-0333 states that environment keys and values are native strings

81

# PEP-0333 states that environment keys and values are native strings

113

# (bytes on Python 2 and str on Python 3). The code points for the Unicode

82

# (bytes on Python 2 and str on Python 3). The code points for the Unicode

114

# strings on Python 3 must be between \00000-\000FF. We deal with bytes

83

# strings on Python 3 must be between \00000-\000FF. We deal with bytes

115

# in Mercurial, so mass convert string keys and values to bytes.

84

# in Mercurial, so mass convert string keys and values to bytes.

116

if pycompat.ispy3:

85

if pycompat.ispy3:

117

env = {k.encode('latin-1'): v for k, v in env.iteritems()}

86

env = {k.encode('latin-1'): v for k, v in env.iteritems()}

118

env = {k: v.encode('latin-1') if isinstance(v, str) else v

87

env = {k: v.encode('latin-1') if isinstance(v, str) else v

119

for k, v in env.iteritems()}

88

for k, v in env.iteritems()}

120

89

121

# https://www.python.org/dev/peps/pep-0333/#environ-variables defines

90

# https://www.python.org/dev/peps/pep-0333/#environ-variables defines

122

# the environment variables.

91

# the environment variables.

123

# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines

92

# https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines

124

# how URLs are reconstructed.

93

# how URLs are reconstructed.

125

fullurl = env['wsgi.url_scheme'] + '://'

94

fullurl = env['wsgi.url_scheme'] + '://'

126

advertisedfullurl = fullurl

95

advertisedfullurl = fullurl

127

96

128

def addport(s):

97

def addport(s):

129

if env['wsgi.url_scheme'] == 'https':

98

if env['wsgi.url_scheme'] == 'https':

130

if env['SERVER_PORT'] != '443':

99

if env['SERVER_PORT'] != '443':

131

s += ':' + env['SERVER_PORT']

100

s += ':' + env['SERVER_PORT']

132

else:

101

else:

133

if env['SERVER_PORT'] != '80':

102

if env['SERVER_PORT'] != '80':

134

s += ':' + env['SERVER_PORT']

103

s += ':' + env['SERVER_PORT']

135

104

136

return s

105

return s

137

106

138

if env.get('HTTP_HOST'):

107

if env.get('HTTP_HOST'):

139

fullurl += env['HTTP_HOST']

108

fullurl += env['HTTP_HOST']

140

else:

109

else:

141

fullurl += env['SERVER_NAME']

110

fullurl += env['SERVER_NAME']

142

fullurl = addport(fullurl)

111

fullurl = addport(fullurl)

143

112

144

advertisedfullurl += env['SERVER_NAME']

113

advertisedfullurl += env['SERVER_NAME']

145

advertisedfullurl = addport(advertisedfullurl)

114

advertisedfullurl = addport(advertisedfullurl)

146

115

147

baseurl = fullurl

116

baseurl = fullurl

148

advertisedbaseurl = advertisedfullurl

117

advertisedbaseurl = advertisedfullurl

149

118

150

fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

119

fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

151

advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

120

advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))

152

fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

121

fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

153

advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

122

advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))

154

123

155

if env.get('QUERY_STRING'):

124

if env.get('QUERY_STRING'):

156

fullurl += '?' + env['QUERY_STRING']

125

fullurl += '?' + env['QUERY_STRING']

157

advertisedfullurl += '?' + env['QUERY_STRING']

126

advertisedfullurl += '?' + env['QUERY_STRING']

158

127

159

# When dispatching requests, we look at the URL components (PATH_INFO

128

# When dispatching requests, we look at the URL components (PATH_INFO

160

# and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir

129

# and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir

161

# has the concept of "virtual" repositories. This is defined via REPO_NAME.

130

# has the concept of "virtual" repositories. This is defined via REPO_NAME.

162

# If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app

131

# If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app

163

# root. We also exclude its path components from PATH_INFO when resolving

132

# root. We also exclude its path components from PATH_INFO when resolving

164

# the dispatch path.

133

# the dispatch path.

165

134

166

apppath = env['SCRIPT_NAME']

135

apppath = env['SCRIPT_NAME']

167

136

168

if env.get('REPO_NAME'):

137

if env.get('REPO_NAME'):

169

if not apppath.endswith('/'):

138

if not apppath.endswith('/'):

170

apppath += '/'

139

apppath += '/'

171

140

172

apppath += env.get('REPO_NAME')

141

apppath += env.get('REPO_NAME')

173

142

174

if 'PATH_INFO' in env:

143

if 'PATH_INFO' in env:

175

dispatchparts = env['PATH_INFO'].strip('/').split('/')

144

dispatchparts = env['PATH_INFO'].strip('/').split('/')

176

145

177

# Strip out repo parts.

146

# Strip out repo parts.

178

repoparts = env.get('REPO_NAME', '').split('/')

147

repoparts = env.get('REPO_NAME', '').split('/')

179

if dispatchparts[:len(repoparts)] == repoparts:

148

if dispatchparts[:len(repoparts)] == repoparts:

180

dispatchparts = dispatchparts[len(repoparts):]

149

dispatchparts = dispatchparts[len(repoparts):]

181

else:

150

else:

182

dispatchparts = []

151

dispatchparts = []

183

152

184

dispatchpath = '/'.join(dispatchparts)

153

dispatchpath = '/'.join(dispatchparts)

185

154

186

querystring = env.get('QUERY_STRING', '')

155

querystring = env.get('QUERY_STRING', '')

187

156

188

# We store as a list so we have ordering information. We also store as

157

# We store as a list so we have ordering information. We also store as

189

# a dict to facilitate fast lookup.

158

# a dict to facilitate fast lookup.

190

querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)

159

querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)

191

160

192

querystringdict = {}

161

querystringdict = {}

193

for k, v in querystringlist:

162

for k, v in querystringlist:

194

if k in querystringdict:

163

if k in querystringdict:

195

querystringdict[k].append(v)

164

querystringdict[k].append(v)

196

else:

165

else:

197

querystringdict[k] = [v]

166

querystringdict[k] = [v]

198

167

199

# HTTP_* keys contain HTTP request headers. The Headers structure should

168

# HTTP_* keys contain HTTP request headers. The Headers structure should

200

# perform case normalization for us. We just rewrite underscore to dash

169

# perform case normalization for us. We just rewrite underscore to dash

201

# so keys match what likely went over the wire.

170

# so keys match what likely went over the wire.

202

headers = []

171

headers = []

203

for k, v in env.iteritems():

172

for k, v in env.iteritems():

204

if k.startswith('HTTP_'):

173

if k.startswith('HTTP_'):

205

headers.append((k[len('HTTP_'):].replace('_', '-'), v))

174

headers.append((k[len('HTTP_'):].replace('_', '-'), v))

206

175

207

headers = wsgiheaders.Headers(headers)

176

headers = wsgiheaders.Headers(headers)

208

177

209

# This is kind of a lie because the HTTP header wasn't explicitly

178

# This is kind of a lie because the HTTP header wasn't explicitly

210

# sent. But for all intents and purposes it should be OK to lie about

179

# sent. But for all intents and purposes it should be OK to lie about

211

# this, since a consumer will either either value to determine how many

180

# this, since a consumer will either either value to determine how many

212

# bytes are available to read.

181

# bytes are available to read.

213

if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:

182

if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:

214

headers['Content-Length'] = env['CONTENT_LENGTH']

183

headers['Content-Length'] = env['CONTENT_LENGTH']

215

184

216

# TODO do this once we remove wsgirequest.inp, otherwise we could have

185

# TODO do this once we remove wsgirequest.inp, otherwise we could have

217

# multiple readers from the underlying input stream.

186

# multiple readers from the underlying input stream.

218

#bodyfh = env['wsgi.input']

187

#bodyfh = env['wsgi.input']

219

#if 'Content-Length' in headers:

188

#if 'Content-Length' in headers:

220

# bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))

189

# bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))

221

190

222

return parsedrequest(method=env['REQUEST_METHOD'],

191

return parsedrequest(method=env['REQUEST_METHOD'],

223

url=fullurl, baseurl=baseurl,

192

url=fullurl, baseurl=baseurl,

224

advertisedurl=advertisedfullurl,

193

advertisedurl=advertisedfullurl,

225

advertisedbaseurl=advertisedbaseurl,

194

advertisedbaseurl=advertisedbaseurl,

226

apppath=apppath,

195

apppath=apppath,

227

dispatchparts=dispatchparts, dispatchpath=dispatchpath,

196

dispatchparts=dispatchparts, dispatchpath=dispatchpath,

228

havepathinfo='PATH_INFO' in env,

197

havepathinfo='PATH_INFO' in env,

229

querystring=querystring,

198

querystring=querystring,

230

querystringlist=querystringlist,

199

querystringlist=querystringlist,

231

querystringdict=querystringdict,

200

querystringdict=querystringdict,

232

headers=headers,

201

headers=headers,

233

bodyfh=bodyfh)

202

bodyfh=bodyfh)

234

203

235

class wsgirequest(object):

204

class wsgirequest(object):

236

"""Higher-level API for a WSGI request.

205

"""Higher-level API for a WSGI request.

237

206

238

WSGI applications are invoked with 2 arguments. They are used to

207

WSGI applications are invoked with 2 arguments. They are used to

239

instantiate instances of this class, which provides higher-level APIs

208

instantiate instances of this class, which provides higher-level APIs

240

for obtaining request parameters, writing HTTP output, etc.

209

for obtaining request parameters, writing HTTP output, etc.

241

"""

210

"""

242

def __init__(self, wsgienv, start_response):

211

def __init__(self, wsgienv, start_response):

243

version = wsgienv[r'wsgi.version']

212

version = wsgienv[r'wsgi.version']

244

if (version < (1, 0)) or (version >= (2, 0)):

213

if (version < (1, 0)) or (version >= (2, 0)):

245

raise RuntimeError("Unknown and unsupported WSGI version %d.%d"

214

raise RuntimeError("Unknown and unsupported WSGI version %d.%d"

246

% version)

215

% version)

247

216

248

inp = wsgienv[r'wsgi.input']

217

inp = wsgienv[r'wsgi.input']

249

218

250

if r'HTTP_CONTENT_LENGTH' in wsgienv:

219

if r'HTTP_CONTENT_LENGTH' in wsgienv:

251

inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))

220

inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))

252

elif r'CONTENT_LENGTH' in wsgienv:

221

elif r'CONTENT_LENGTH' in wsgienv:

253

inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))

222

inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))

254

223

255

self.err = wsgienv[r'wsgi.errors']

224

self.err = wsgienv[r'wsgi.errors']

256

self.threaded = wsgienv[r'wsgi.multithread']

225

self.threaded = wsgienv[r'wsgi.multithread']

257

self.multiprocess = wsgienv[r'wsgi.multiprocess']

226

self.multiprocess = wsgienv[r'wsgi.multiprocess']

258

self.run_once = wsgienv[r'wsgi.run_once']

227

self.run_once = wsgienv[r'wsgi.run_once']

259

self.env = wsgienv

228

self.env = wsgienv

260

self.req = parserequestfromenv(wsgienv, inp)

229

self.req = parserequestfromenv(wsgienv, inp)

261

self.form = ~~normalize~~(self.req.querystringdict)

230

self.form = self.req.querystringdict

262

self._start_response = start_response

231

self._start_response = start_response

263

self.server_write = None

232

self.server_write = None

264

self.headers = []

233

self.headers = []

265

234

266

def respond(self, status, type, filename=None, body=None):

235

def respond(self, status, type, filename=None, body=None):

267

if not isinstance(type, str):

236

if not isinstance(type, str):

268

type = pycompat.sysstr(type)

237

type = pycompat.sysstr(type)

269

if self._start_response is not None:

238

if self._start_response is not None:

270

self.headers.append((r'Content-Type', type))

239

self.headers.append((r'Content-Type', type))

271

if filename:

240

if filename:

272

filename = (filename.rpartition('/')[-1]

241

filename = (filename.rpartition('/')[-1]

273

.replace('\\', '\\\\').replace('"', '\\"'))

242

.replace('\\', '\\\\').replace('"', '\\"'))

274

self.headers.append(('Content-Disposition',

243

self.headers.append(('Content-Disposition',

275

'inline; filename="%s"' % filename))

244

'inline; filename="%s"' % filename))

276

if body is not None:

245

if body is not None:

277

self.headers.append((r'Content-Length', str(len(body))))

246

self.headers.append((r'Content-Length', str(len(body))))

278

247

279

for k, v in self.headers:

248

for k, v in self.headers:

280

if not isinstance(v, str):

249

if not isinstance(v, str):

281

raise TypeError('header value must be string: %r' % (v,))

250

raise TypeError('header value must be string: %r' % (v,))

282

251

283

if isinstance(status, ErrorResponse):

252

if isinstance(status, ErrorResponse):

284

self.headers.extend(status.headers)

253

self.headers.extend(status.headers)

285

if status.code == HTTP_NOT_MODIFIED:

254

if status.code == HTTP_NOT_MODIFIED:

286

# RFC 2616 Section 10.3.5: 304 Not Modified has cases where

255

# RFC 2616 Section 10.3.5: 304 Not Modified has cases where

287

# it MUST NOT include any headers other than these and no

256

# it MUST NOT include any headers other than these and no

288

# body

257

# body

289

self.headers = [(k, v) for (k, v) in self.headers if

258

self.headers = [(k, v) for (k, v) in self.headers if

290

k in ('Date', 'ETag', 'Expires',

259

k in ('Date', 'ETag', 'Expires',

291

'Cache-Control', 'Vary')]

260

'Cache-Control', 'Vary')]

292

status = statusmessage(status.code, pycompat.bytestr(status))

261

status = statusmessage(status.code, pycompat.bytestr(status))

293

elif status == 200:

262

elif status == 200:

294

status = '200 Script output follows'

263

status = '200 Script output follows'

295

elif isinstance(status, int):

264

elif isinstance(status, int):

296

status = statusmessage(status)

265

status = statusmessage(status)

297

266

298

# Various HTTP clients (notably httplib) won't read the HTTP

267

# Various HTTP clients (notably httplib) won't read the HTTP

299

# response until the HTTP request has been sent in full. If servers

268

# response until the HTTP request has been sent in full. If servers

300

# (us) send a response before the HTTP request has been fully sent,

269

# (us) send a response before the HTTP request has been fully sent,

301

# the connection may deadlock because neither end is reading.

270

# the connection may deadlock because neither end is reading.

302

#

271

#

303

# We work around this by "draining" the request data before

272

# We work around this by "draining" the request data before

304

# sending any response in some conditions.

273

# sending any response in some conditions.

305

drain = False

274

drain = False

306

close = False

275

close = False

307

276

308

# If the client sent Expect: 100-continue, we assume it is smart

277

# If the client sent Expect: 100-continue, we assume it is smart

309

# enough to deal with the server sending a response before reading

278

# enough to deal with the server sending a response before reading

310

# the request. (httplib doesn't do this.)

279

# the request. (httplib doesn't do this.)

311

if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':

280

if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':

312

pass

281

pass

313

# Only tend to request methods that have bodies. Strictly speaking,

282

# Only tend to request methods that have bodies. Strictly speaking,

314

# we should sniff for a body. But this is fine for our existing

283

# we should sniff for a body. But this is fine for our existing

315

# WSGI applications.

284

# WSGI applications.

316

elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):

285

elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):

317

pass

286

pass

318

else:

287

else:

319

# If we don't know how much data to read, there's no guarantee

288

# If we don't know how much data to read, there's no guarantee

320

# that we can drain the request responsibly. The WSGI

289

# that we can drain the request responsibly. The WSGI

321

# specification only says that servers *should* ensure the

290

# specification only says that servers *should* ensure the

322

# input stream doesn't overrun the actual request. So there's

291

# input stream doesn't overrun the actual request. So there's

323

# no guarantee that reading until EOF won't corrupt the stream

292

# no guarantee that reading until EOF won't corrupt the stream

324

# state.

293

# state.

325

if not isinstance(self.req.bodyfh, util.cappedreader):

294

if not isinstance(self.req.bodyfh, util.cappedreader):

326

close = True

295

close = True

327

else:

296

else:

328

# We /could/ only drain certain HTTP response codes. But 200

297

# We /could/ only drain certain HTTP response codes. But 200

329

# and non-200 wire protocol responses both require draining.

298

# and non-200 wire protocol responses both require draining.

330

# Since we have a capped reader in place for all situations

299

# Since we have a capped reader in place for all situations

331

# where we drain, it is safe to read from that stream. We'll

300

# where we drain, it is safe to read from that stream. We'll

332

# either do a drain or no-op if we're already at EOF.

301

# either do a drain or no-op if we're already at EOF.

333

drain = True

302

drain = True

334

303

335

if close:

304

if close:

336

self.headers.append((r'Connection', r'Close'))

305

self.headers.append((r'Connection', r'Close'))

337

306

338

if drain:

307

if drain:

339

assert isinstance(self.req.bodyfh, util.cappedreader)

308

assert isinstance(self.req.bodyfh, util.cappedreader)

340

while True:

309

while True:

341

chunk = self.req.bodyfh.read(32768)

310

chunk = self.req.bodyfh.read(32768)

342

if not chunk:

311

if not chunk:

343

break

312

break

344

313

345

self.server_write = self._start_response(

314

self.server_write = self._start_response(

346

pycompat.sysstr(status), self.headers)

315

pycompat.sysstr(status), self.headers)

347

self._start_response = None

316

self._start_response = None

348

self.headers = []

317

self.headers = []

349

if body is not None:

318

if body is not None:

350

self.write(body)

319

self.write(body)

351

self.server_write = None

320

self.server_write = None

352

321

353

def write(self, thing):

322

def write(self, thing):

354

if thing:

323

if thing:

355

try:

324

try:

356

self.server_write(thing)

325

self.server_write(thing)

357

except socket.error as inst:

326

except socket.error as inst:

358

if inst[0] != errno.ECONNRESET:

327

if inst[0] != errno.ECONNRESET:

359

raise

328

raise

360

329

361

def flush(self):

330

def flush(self):

362

return None

331

return None

363

332

364

def wsgiapplication(app_maker):

333

def wsgiapplication(app_maker):

365

'''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

334

'''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()

366

can and should now be used as a WSGI application.'''

335

can and should now be used as a WSGI application.'''

367

application = app_maker()

336

application = app_maker()

368

def run_wsgi(env, respond):

337

def run_wsgi(env, respond):

369

return application(env, respond)

338

return application(env, respond)

370

return run_wsgi

339

return run_wsgi

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # hgweb/request.py - An http request from either CGI or the standalone server.
             #
             # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
             # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import errno
             import socket
             import wsgiref.headers as wsgiheaders
             #import wsgiref.validate
             from .common import (
                 ErrorResponse,
                 HTTP_NOT_MODIFIED,
                 statusmessage,
             )
             from ..thirdparty import (
                 attr,
             )
             from .. import (
                 pycompat,
                 util,
             )
-            shortcuts = {
-                'cl': [('cmd', ['changelog']), ('rev', None)],
-                'sl': [('cmd', ['shortlog']), ('rev', None)],
-                'cs': [('cmd', ['changeset']), ('node', None)],
-                'f': [('cmd', ['file']), ('filenode', None)],
-                'fl': [('cmd', ['filelog']), ('filenode', None)],
-                'fd': [('cmd', ['filediff']), ('node', None)],
-                'fa': [('cmd', ['annotate']), ('filenode', None)],
-                'mf': [('cmd', ['manifest']), ('manifest', None)],
-                'ca': [('cmd', ['archive']), ('node', None)],
-                'tags': [('cmd', ['tags'])],
-                'tip': [('cmd', ['changeset']), ('node', ['tip'])],
-                'static': [('cmd', ['static']), ('file', None)]
-            def normalize(form):
-                # first expand the shortcuts
-                for k in shortcuts:
-                    if k in form:
-                        for name, value in shortcuts[k]:
-                            if value is None:
-                                value = form[k]
-                            form[name] = value
-                        del form[k]
-                # And strip the values
-                bytesform = {}
-                for k, v in form.iteritems():
-                    bytesform[pycompat.bytesurl(k)] = [
-                        pycompat.bytesurl(i.strip()) for i in v]
-                return bytesform
             @attr.s(frozen=True)
             class parsedrequest(object):
                 """Represents a parsed WSGI request.
                 Contains both parsed parameters as well as a handle on the input stream.
                 """
                 # Request method.
                 method = attr.ib()
                 # Full URL for this request.
                 url = attr.ib()
                 # URL without any path components. Just <proto>://<host><port>.
                 baseurl = attr.ib()
                 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
                 # of HTTP: Host header for hostname. This is likely what clients used.
                 advertisedurl = attr.ib()
                 advertisedbaseurl = attr.ib()
                 # WSGI application path.
                 apppath = attr.ib()
                 # List of path parts to be used for dispatch.
                 dispatchparts = attr.ib()
                 # URL path component (no query string) used for dispatch.
                 dispatchpath = attr.ib()
                 # Whether there is a path component to this request. This can be true
                 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
                 havepathinfo = attr.ib()
                 # Raw query string (part after "?" in URL).
                 querystring = attr.ib()
                 # List of 2-tuples of query string arguments.
                 querystringlist = attr.ib()
                 # Dict of query string arguments. Values are lists with at least 1 item.
                 querystringdict = attr.ib()
                 # wsgiref.headers.Headers instance. Operates like a dict with case
                 # insensitive keys.
                 headers = attr.ib()
                 # Request body input stream.
                 bodyfh = attr.ib()
             def parserequestfromenv(env, bodyfh):
                 """Parse URL components from environment variables.
                 WSGI defines request attributes via environment variables. This function
                 parses the environment variables into a data structure.
                 """
                 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
                 # We first validate that the incoming object conforms with the WSGI spec.
                 # We only want to be dealing with spec-conforming WSGI implementations.
                 # TODO enable this once we fix internal violations.
                 #wsgiref.validate.check_environ(env)
                 # PEP-0333 states that environment keys and values are native strings
                 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
                 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
                 # in Mercurial, so mass convert string keys and values to bytes.
                 if pycompat.ispy3:
                     env = {k.encode('latin-1'): v for k, v in env.iteritems()}
                     env = {k: v.encode('latin-1') if isinstance(v, str) else v
                            for k, v in env.iteritems()}
                 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
                 # the environment variables.
                 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
                 # how URLs are reconstructed.
                 fullurl = env['wsgi.url_scheme'] + '://'
                 advertisedfullurl = fullurl
                 def addport(s):
                     if env['wsgi.url_scheme'] == 'https':
                         if env['SERVER_PORT'] != '443':
                             s += ':' + env['SERVER_PORT']
                     else:
                         if env['SERVER_PORT'] != '80':
                             s += ':' + env['SERVER_PORT']
                     return s
                 if env.get('HTTP_HOST'):
                     fullurl += env['HTTP_HOST']
                 else:
                     fullurl += env['SERVER_NAME']
                     fullurl = addport(fullurl)
                 advertisedfullurl += env['SERVER_NAME']
                 advertisedfullurl = addport(advertisedfullurl)
                 baseurl = fullurl
                 advertisedbaseurl = advertisedfullurl
                 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
                 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
                 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
                 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
                 if env.get('QUERY_STRING'):
                     fullurl += '?' + env['QUERY_STRING']
                     advertisedfullurl += '?' + env['QUERY_STRING']
                 # When dispatching requests, we look at the URL components (PATH_INFO
                 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
                 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
                 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
                 # root. We also exclude its path components from PATH_INFO when resolving
                 # the dispatch path.
                 apppath = env['SCRIPT_NAME']
                 if env.get('REPO_NAME'):
                     if not apppath.endswith('/'):
                         apppath += '/'
                     apppath += env.get('REPO_NAME')
                 if 'PATH_INFO' in env:
                     dispatchparts = env['PATH_INFO'].strip('/').split('/')
                     # Strip out repo parts.
                     repoparts = env.get('REPO_NAME', '').split('/')
                     if dispatchparts[:len(repoparts)] == repoparts:
                         dispatchparts = dispatchparts[len(repoparts):]
                 else:
                     dispatchparts = []
                 dispatchpath = '/'.join(dispatchparts)
                 querystring = env.get('QUERY_STRING', '')
                 # We store as a list so we have ordering information. We also store as
                 # a dict to facilitate fast lookup.
                 querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True)
                 querystringdict = {}
                 for k, v in querystringlist:
                     if k in querystringdict:
                         querystringdict[k].append(v)
                     else:
                         querystringdict[k] = [v]
                 # HTTP_* keys contain HTTP request headers. The Headers structure should
                 # perform case normalization for us. We just rewrite underscore to dash
                 # so keys match what likely went over the wire.
                 headers = []
                 for k, v in env.iteritems():
                     if k.startswith('HTTP_'):
                         headers.append((k[len('HTTP_'):].replace('_', '-'), v))
                 headers = wsgiheaders.Headers(headers)
                 # This is kind of a lie because the HTTP header wasn't explicitly
                 # sent. But for all intents and purposes it should be OK to lie about
                 # this, since a consumer will either either value to determine how many
                 # bytes are available to read.
                 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
                     headers['Content-Length'] = env['CONTENT_LENGTH']
                 # TODO do this once we remove wsgirequest.inp, otherwise we could have
                 # multiple readers from the underlying input stream.
                 #bodyfh = env['wsgi.input']
                 #if 'Content-Length' in headers:
                 #    bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
                 return parsedrequest(method=env['REQUEST_METHOD'],
                                      url=fullurl, baseurl=baseurl,
                                      advertisedurl=advertisedfullurl,
                                      advertisedbaseurl=advertisedbaseurl,
                                      apppath=apppath,
                                      dispatchparts=dispatchparts, dispatchpath=dispatchpath,
                                      havepathinfo='PATH_INFO' in env,
                                      querystring=querystring,
                                      querystringlist=querystringlist,
                                      querystringdict=querystringdict,
                                      headers=headers,
                                      bodyfh=bodyfh)
             class wsgirequest(object):
                 """Higher-level API for a WSGI request.
                 WSGI applications are invoked with 2 arguments. They are used to
                 instantiate instances of this class, which provides higher-level APIs
                 for obtaining request parameters, writing HTTP output, etc.
                 """
                 def __init__(self, wsgienv, start_response):
                     version = wsgienv[r'wsgi.version']
                     if (version < (1, 0)) or (version >= (2, 0)):
                         raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
                                            % version)
                     inp = wsgienv[r'wsgi.input']
                     if r'HTTP_CONTENT_LENGTH' in wsgienv:
                         inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
                     elif r'CONTENT_LENGTH' in wsgienv:
                         inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
                     self.err = wsgienv[r'wsgi.errors']
                     self.threaded = wsgienv[r'wsgi.multithread']
                     self.multiprocess = wsgienv[r'wsgi.multiprocess']
                     self.run_once = wsgienv[r'wsgi.run_once']
                     self.env = wsgienv
                     self.req = parserequestfromenv(wsgienv, inp)
-                    self.form = normalize(self.req.querystringdict)
+                    self.form = self.req.querystringdict
                     self._start_response = start_response
                     self.server_write = None
                     self.headers = []
                 def respond(self, status, type, filename=None, body=None):
                     if not isinstance(type, str):
                         type = pycompat.sysstr(type)
                     if self._start_response is not None:
                         self.headers.append((r'Content-Type', type))
                         if filename:
                             filename = (filename.rpartition('/')[-1]
                                         .replace('\\', '\\\\').replace('"', '\\"'))
                             self.headers.append(('Content-Disposition',
                                                  'inline; filename="%s"' % filename))
                         if body is not None:
                             self.headers.append((r'Content-Length', str(len(body))))
                         for k, v in self.headers:
                             if not isinstance(v, str):
                                 raise TypeError('header value must be string: %r' % (v,))
                         if isinstance(status, ErrorResponse):
                             self.headers.extend(status.headers)
                             if status.code == HTTP_NOT_MODIFIED:
                                 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
                                 # it MUST NOT include any headers other than these and no
                                 # body
                                 self.headers = [(k, v) for (k, v) in self.headers if
                                                 k in ('Date', 'ETag', 'Expires',
                                                       'Cache-Control', 'Vary')]
                             status = statusmessage(status.code, pycompat.bytestr(status))
                         elif status == 200:
                             status = '200 Script output follows'
                         elif isinstance(status, int):
                             status = statusmessage(status)
                         # Various HTTP clients (notably httplib) won't read the HTTP
                         # response until the HTTP request has been sent in full. If servers
                         # (us) send a response before the HTTP request has been fully sent,
                         # the connection may deadlock because neither end is reading.
                         #
                         # We work around this by "draining" the request data before
                         # sending any response in some conditions.
                         drain = False
                         close = False
                         # If the client sent Expect: 100-continue, we assume it is smart
                         # enough to deal with the server sending a response before reading
                         # the request. (httplib doesn't do this.)
                         if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
                             pass
                         # Only tend to request methods that have bodies. Strictly speaking,
                         # we should sniff for a body. But this is fine for our existing
                         # WSGI applications.
                         elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
                             pass
                         else:
                             # If we don't know how much data to read, there's no guarantee
                             # that we can drain the request responsibly. The WSGI
                             # specification only says that servers *should* ensure the
                             # input stream doesn't overrun the actual request. So there's
                             # no guarantee that reading until EOF won't corrupt the stream
                             # state.
                             if not isinstance(self.req.bodyfh, util.cappedreader):
                                 close = True
                             else:
                                 # We /could/ only drain certain HTTP response codes. But 200
                                 # and non-200 wire protocol responses both require draining.
                                 # Since we have a capped reader in place for all situations
                                 # where we drain, it is safe to read from that stream. We'll
                                 # either do a drain or no-op if we're already at EOF.
                                 drain = True
                         if close:
                             self.headers.append((r'Connection', r'Close'))
                         if drain:
                             assert isinstance(self.req.bodyfh, util.cappedreader)
                             while True:
                                 chunk = self.req.bodyfh.read(32768)
                                 if not chunk:
                                     break
                         self.server_write = self._start_response(
                             pycompat.sysstr(status), self.headers)
                         self._start_response = None
                         self.headers = []
                     if body is not None:
                         self.write(body)
                         self.server_write = None
                 def write(self, thing):
                     if thing:
                         try:
                             self.server_write(thing)
                         except socket.error as inst:
                             if inst[0] != errno.ECONNRESET:
                                 raise
                 def flush(self):
                     return None
             def wsgiapplication(app_maker):
                 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
                 can and should now be used as a WSGI application.'''
                 application = app_maker()
                 def run_wsgi(env, respond):
                     return application(env, respond)
                 return run_wsgi

             #require serve
             Test raw style of hgweb
               $ hg init test
               $ cd test
               $ mkdir sub
               $ cat >'sub/some text%.txt' <<ENDSOME
               > This is just some random text
               > that will go inside the file and take a few lines.
               > It is very boring to read, but computers don't
               > care about things like that.
               > ENDSOME
               $ hg add 'sub/some text%.txt'
               $ hg commit -d "1 0" -m "Just some text"
               $ hg serve -p $HGPORT -A access.log -E error.log -d --pid-file=hg.pid
               $ cat hg.pid >> $DAEMON_PIDS
-              $ (get-with-headers.py localhost:$HGPORT '?f=bf0ff59095c9;file=sub/some%20text%25.txt;style=raw' content-type content-length content-disposition) >getoutput.txt
+              $ (get-with-headers.py localhost:$HGPORT 'raw-file/bf0ff59095c9/sub/some%20text%25.txt' content-type content-length content-disposition) >getoutput.txt
               $ killdaemons.py hg.pid
               $ cat getoutput.txt
 Script output follows
               content-type: application/binary
               content-length: 157
               content-disposition: inline; filename="some text%.txt"
               This is just some random text
               that will go inside the file and take a few lines.
               It is very boring to read, but computers don't
               care about things like that.
               $ cat access.log error.log
-              $LOCALIP - - [*] "GET /?f=bf0ff59095c9;file=sub/some%20text%25.txt;style=raw HTTP/1.1" 200 - (glob)
+              $LOCALIP - - [$LOGDATE$] "GET /raw-file/bf0ff59095c9/sub/some%20text%25.txt HTTP/1.1" 200 - (glob)
               $ rm access.log error.log
               $ hg serve -p $HGPORT -A access.log -E error.log -d --pid-file=hg.pid \
               > --config web.guessmime=True
               $ cat hg.pid >> $DAEMON_PIDS
-              $ (get-with-headers.py localhost:$HGPORT '?f=bf0ff59095c9;file=sub/some%20text%25.txt;style=raw' content-type content-length content-disposition) >getoutput.txt
+              $ (get-with-headers.py localhost:$HGPORT 'raw-file/bf0ff59095c9/sub/some%20text%25.txt' content-type content-length content-disposition) >getoutput.txt
               $ killdaemons.py hg.pid
               $ cat getoutput.txt
 Script output follows
               content-type: text/plain; charset="ascii"
               content-length: 157
               content-disposition: inline; filename="some text%.txt"
               This is just some random text
               that will go inside the file and take a few lines.
               It is very boring to read, but computers don't
               care about things like that.
               $ cat access.log error.log
-              $LOCALIP - - [*] "GET /?f=bf0ff59095c9;file=sub/some%20text%25.txt;style=raw HTTP/1.1" 200 - (glob)
+              $LOCALIP - - [$LOGDATE$] "GET /raw-file/bf0ff59095c9/sub/some%20text%25.txt HTTP/1.1" 200 - (glob)
               $ cd ..