##// END OF EJS Templates
hgweb: expose repo name on parsedrequest...
Gregory Szorc -
r36884:8ddb5c35 default
parent child Browse files
Show More
@@ -1,442 +1,443 b''
1 # hgweb/hgweb_mod.py - Web interface for a repository.
1 # hgweb/hgweb_mod.py - Web interface for a repository.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import contextlib
11 import contextlib
12 import os
12 import os
13
13
14 from .common import (
14 from .common import (
15 ErrorResponse,
15 ErrorResponse,
16 HTTP_BAD_REQUEST,
16 HTTP_BAD_REQUEST,
17 HTTP_NOT_FOUND,
17 HTTP_NOT_FOUND,
18 HTTP_NOT_MODIFIED,
18 HTTP_NOT_MODIFIED,
19 HTTP_OK,
19 HTTP_OK,
20 HTTP_SERVER_ERROR,
20 HTTP_SERVER_ERROR,
21 caching,
21 caching,
22 cspvalues,
22 cspvalues,
23 permhooks,
23 permhooks,
24 )
24 )
25
25
26 from .. import (
26 from .. import (
27 encoding,
27 encoding,
28 error,
28 error,
29 formatter,
29 formatter,
30 hg,
30 hg,
31 hook,
31 hook,
32 profiling,
32 profiling,
33 pycompat,
33 pycompat,
34 repoview,
34 repoview,
35 templatefilters,
35 templatefilters,
36 templater,
36 templater,
37 ui as uimod,
37 ui as uimod,
38 util,
38 util,
39 wireprotoserver,
39 wireprotoserver,
40 )
40 )
41
41
42 from . import (
42 from . import (
43 request as requestmod,
43 request as requestmod,
44 webcommands,
44 webcommands,
45 webutil,
45 webutil,
46 wsgicgi,
46 wsgicgi,
47 )
47 )
48
48
49 archivespecs = util.sortdict((
49 archivespecs = util.sortdict((
50 ('zip', ('application/zip', 'zip', '.zip', None)),
50 ('zip', ('application/zip', 'zip', '.zip', None)),
51 ('gz', ('application/x-gzip', 'tgz', '.tar.gz', None)),
51 ('gz', ('application/x-gzip', 'tgz', '.tar.gz', None)),
52 ('bz2', ('application/x-bzip2', 'tbz2', '.tar.bz2', None)),
52 ('bz2', ('application/x-bzip2', 'tbz2', '.tar.bz2', None)),
53 ))
53 ))
54
54
55 def getstyle(req, configfn, templatepath):
55 def getstyle(req, configfn, templatepath):
56 styles = (
56 styles = (
57 req.qsparams.get('style', None),
57 req.qsparams.get('style', None),
58 configfn('web', 'style'),
58 configfn('web', 'style'),
59 'paper',
59 'paper',
60 )
60 )
61 return styles, templater.stylemap(styles, templatepath)
61 return styles, templater.stylemap(styles, templatepath)
62
62
63 def makebreadcrumb(url, prefix=''):
63 def makebreadcrumb(url, prefix=''):
64 '''Return a 'URL breadcrumb' list
64 '''Return a 'URL breadcrumb' list
65
65
66 A 'URL breadcrumb' is a list of URL-name pairs,
66 A 'URL breadcrumb' is a list of URL-name pairs,
67 corresponding to each of the path items on a URL.
67 corresponding to each of the path items on a URL.
68 This can be used to create path navigation entries.
68 This can be used to create path navigation entries.
69 '''
69 '''
70 if url.endswith('/'):
70 if url.endswith('/'):
71 url = url[:-1]
71 url = url[:-1]
72 if prefix:
72 if prefix:
73 url = '/' + prefix + url
73 url = '/' + prefix + url
74 relpath = url
74 relpath = url
75 if relpath.startswith('/'):
75 if relpath.startswith('/'):
76 relpath = relpath[1:]
76 relpath = relpath[1:]
77
77
78 breadcrumb = []
78 breadcrumb = []
79 urlel = url
79 urlel = url
80 pathitems = [''] + relpath.split('/')
80 pathitems = [''] + relpath.split('/')
81 for pathel in reversed(pathitems):
81 for pathel in reversed(pathitems):
82 if not pathel or not urlel:
82 if not pathel or not urlel:
83 break
83 break
84 breadcrumb.append({'url': urlel, 'name': pathel})
84 breadcrumb.append({'url': urlel, 'name': pathel})
85 urlel = os.path.dirname(urlel)
85 urlel = os.path.dirname(urlel)
86 return reversed(breadcrumb)
86 return reversed(breadcrumb)
87
87
88 class requestcontext(object):
88 class requestcontext(object):
89 """Holds state/context for an individual request.
89 """Holds state/context for an individual request.
90
90
91 Servers can be multi-threaded. Holding state on the WSGI application
91 Servers can be multi-threaded. Holding state on the WSGI application
92 is prone to race conditions. Instances of this class exist to hold
92 is prone to race conditions. Instances of this class exist to hold
93 mutable and race-free state for requests.
93 mutable and race-free state for requests.
94 """
94 """
95 def __init__(self, app, repo):
95 def __init__(self, app, repo):
96 self.repo = repo
96 self.repo = repo
97 self.reponame = app.reponame
97 self.reponame = app.reponame
98
98
99 self.archivespecs = archivespecs
99 self.archivespecs = archivespecs
100
100
101 self.maxchanges = self.configint('web', 'maxchanges')
101 self.maxchanges = self.configint('web', 'maxchanges')
102 self.stripecount = self.configint('web', 'stripes')
102 self.stripecount = self.configint('web', 'stripes')
103 self.maxshortchanges = self.configint('web', 'maxshortchanges')
103 self.maxshortchanges = self.configint('web', 'maxshortchanges')
104 self.maxfiles = self.configint('web', 'maxfiles')
104 self.maxfiles = self.configint('web', 'maxfiles')
105 self.allowpull = self.configbool('web', 'allow-pull')
105 self.allowpull = self.configbool('web', 'allow-pull')
106
106
107 # we use untrusted=False to prevent a repo owner from using
107 # we use untrusted=False to prevent a repo owner from using
108 # web.templates in .hg/hgrc to get access to any file readable
108 # web.templates in .hg/hgrc to get access to any file readable
109 # by the user running the CGI script
109 # by the user running the CGI script
110 self.templatepath = self.config('web', 'templates', untrusted=False)
110 self.templatepath = self.config('web', 'templates', untrusted=False)
111
111
112 # This object is more expensive to build than simple config values.
112 # This object is more expensive to build than simple config values.
113 # It is shared across requests. The app will replace the object
113 # It is shared across requests. The app will replace the object
114 # if it is updated. Since this is a reference and nothing should
114 # if it is updated. Since this is a reference and nothing should
115 # modify the underlying object, it should be constant for the lifetime
115 # modify the underlying object, it should be constant for the lifetime
116 # of the request.
116 # of the request.
117 self.websubtable = app.websubtable
117 self.websubtable = app.websubtable
118
118
119 self.csp, self.nonce = cspvalues(self.repo.ui)
119 self.csp, self.nonce = cspvalues(self.repo.ui)
120
120
121 # Trust the settings from the .hg/hgrc files by default.
121 # Trust the settings from the .hg/hgrc files by default.
122 def config(self, section, name, default=uimod._unset, untrusted=True):
122 def config(self, section, name, default=uimod._unset, untrusted=True):
123 return self.repo.ui.config(section, name, default,
123 return self.repo.ui.config(section, name, default,
124 untrusted=untrusted)
124 untrusted=untrusted)
125
125
126 def configbool(self, section, name, default=uimod._unset, untrusted=True):
126 def configbool(self, section, name, default=uimod._unset, untrusted=True):
127 return self.repo.ui.configbool(section, name, default,
127 return self.repo.ui.configbool(section, name, default,
128 untrusted=untrusted)
128 untrusted=untrusted)
129
129
130 def configint(self, section, name, default=uimod._unset, untrusted=True):
130 def configint(self, section, name, default=uimod._unset, untrusted=True):
131 return self.repo.ui.configint(section, name, default,
131 return self.repo.ui.configint(section, name, default,
132 untrusted=untrusted)
132 untrusted=untrusted)
133
133
134 def configlist(self, section, name, default=uimod._unset, untrusted=True):
134 def configlist(self, section, name, default=uimod._unset, untrusted=True):
135 return self.repo.ui.configlist(section, name, default,
135 return self.repo.ui.configlist(section, name, default,
136 untrusted=untrusted)
136 untrusted=untrusted)
137
137
138 def archivelist(self, nodeid):
138 def archivelist(self, nodeid):
139 allowed = self.configlist('web', 'allow_archive')
139 allowed = self.configlist('web', 'allow_archive')
140 for typ, spec in self.archivespecs.iteritems():
140 for typ, spec in self.archivespecs.iteritems():
141 if typ in allowed or self.configbool('web', 'allow%s' % typ):
141 if typ in allowed or self.configbool('web', 'allow%s' % typ):
142 yield {'type': typ, 'extension': spec[2], 'node': nodeid}
142 yield {'type': typ, 'extension': spec[2], 'node': nodeid}
143
143
144 def templater(self, wsgireq, req):
144 def templater(self, req):
145 # determine scheme, port and server name
145 # determine scheme, port and server name
146 # this is needed to create absolute urls
146 # this is needed to create absolute urls
147 logourl = self.config('web', 'logourl')
147 logourl = self.config('web', 'logourl')
148 logoimg = self.config('web', 'logoimg')
148 logoimg = self.config('web', 'logoimg')
149 staticurl = (self.config('web', 'staticurl')
149 staticurl = (self.config('web', 'staticurl')
150 or req.apppath + '/static/')
150 or req.apppath + '/static/')
151 if not staticurl.endswith('/'):
151 if not staticurl.endswith('/'):
152 staticurl += '/'
152 staticurl += '/'
153
153
154 # some functions for the templater
154 # some functions for the templater
155
155
156 def motd(**map):
156 def motd(**map):
157 yield self.config('web', 'motd')
157 yield self.config('web', 'motd')
158
158
159 # figure out which style to use
159 # figure out which style to use
160
160
161 vars = {}
161 vars = {}
162 styles, (style, mapfile) = getstyle(wsgireq.req, self.config,
162 styles, (style, mapfile) = getstyle(req, self.config,
163 self.templatepath)
163 self.templatepath)
164 if style == styles[0]:
164 if style == styles[0]:
165 vars['style'] = style
165 vars['style'] = style
166
166
167 sessionvars = webutil.sessionvars(vars, '?')
167 sessionvars = webutil.sessionvars(vars, '?')
168
168
169 if not self.reponame:
169 if not self.reponame:
170 self.reponame = (self.config('web', 'name', '')
170 self.reponame = (self.config('web', 'name', '')
171 or wsgireq.env.get('REPO_NAME')
171 or req.reponame
172 or req.apppath or self.repo.root)
172 or req.apppath
173 or self.repo.root)
173
174
174 def websubfilter(text):
175 def websubfilter(text):
175 return templatefilters.websub(text, self.websubtable)
176 return templatefilters.websub(text, self.websubtable)
176
177
177 # create the templater
178 # create the templater
178 # TODO: export all keywords: defaults = templatekw.keywords.copy()
179 # TODO: export all keywords: defaults = templatekw.keywords.copy()
179 defaults = {
180 defaults = {
180 'url': req.apppath + '/',
181 'url': req.apppath + '/',
181 'logourl': logourl,
182 'logourl': logourl,
182 'logoimg': logoimg,
183 'logoimg': logoimg,
183 'staticurl': staticurl,
184 'staticurl': staticurl,
184 'urlbase': req.advertisedbaseurl,
185 'urlbase': req.advertisedbaseurl,
185 'repo': self.reponame,
186 'repo': self.reponame,
186 'encoding': encoding.encoding,
187 'encoding': encoding.encoding,
187 'motd': motd,
188 'motd': motd,
188 'sessionvars': sessionvars,
189 'sessionvars': sessionvars,
189 'pathdef': makebreadcrumb(req.apppath),
190 'pathdef': makebreadcrumb(req.apppath),
190 'style': style,
191 'style': style,
191 'nonce': self.nonce,
192 'nonce': self.nonce,
192 }
193 }
193 tres = formatter.templateresources(self.repo.ui, self.repo)
194 tres = formatter.templateresources(self.repo.ui, self.repo)
194 tmpl = templater.templater.frommapfile(mapfile,
195 tmpl = templater.templater.frommapfile(mapfile,
195 filters={'websub': websubfilter},
196 filters={'websub': websubfilter},
196 defaults=defaults,
197 defaults=defaults,
197 resources=tres)
198 resources=tres)
198 return tmpl
199 return tmpl
199
200
200
201
201 class hgweb(object):
202 class hgweb(object):
202 """HTTP server for individual repositories.
203 """HTTP server for individual repositories.
203
204
204 Instances of this class serve HTTP responses for a particular
205 Instances of this class serve HTTP responses for a particular
205 repository.
206 repository.
206
207
207 Instances are typically used as WSGI applications.
208 Instances are typically used as WSGI applications.
208
209
209 Some servers are multi-threaded. On these servers, there may
210 Some servers are multi-threaded. On these servers, there may
210 be multiple active threads inside __call__.
211 be multiple active threads inside __call__.
211 """
212 """
212 def __init__(self, repo, name=None, baseui=None):
213 def __init__(self, repo, name=None, baseui=None):
213 if isinstance(repo, str):
214 if isinstance(repo, str):
214 if baseui:
215 if baseui:
215 u = baseui.copy()
216 u = baseui.copy()
216 else:
217 else:
217 u = uimod.ui.load()
218 u = uimod.ui.load()
218 r = hg.repository(u, repo)
219 r = hg.repository(u, repo)
219 else:
220 else:
220 # we trust caller to give us a private copy
221 # we trust caller to give us a private copy
221 r = repo
222 r = repo
222
223
223 r.ui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
224 r.ui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
224 r.baseui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
225 r.baseui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
225 r.ui.setconfig('ui', 'nontty', 'true', 'hgweb')
226 r.ui.setconfig('ui', 'nontty', 'true', 'hgweb')
226 r.baseui.setconfig('ui', 'nontty', 'true', 'hgweb')
227 r.baseui.setconfig('ui', 'nontty', 'true', 'hgweb')
227 # resolve file patterns relative to repo root
228 # resolve file patterns relative to repo root
228 r.ui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
229 r.ui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
229 r.baseui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
230 r.baseui.setconfig('ui', 'forcecwd', r.root, 'hgweb')
230 # displaying bundling progress bar while serving feel wrong and may
231 # displaying bundling progress bar while serving feel wrong and may
231 # break some wsgi implementation.
232 # break some wsgi implementation.
232 r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
233 r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
233 r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
234 r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
234 self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))]
235 self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))]
235 self._lastrepo = self._repos[0]
236 self._lastrepo = self._repos[0]
236 hook.redirect(True)
237 hook.redirect(True)
237 self.reponame = name
238 self.reponame = name
238
239
239 def _webifyrepo(self, repo):
240 def _webifyrepo(self, repo):
240 repo = getwebview(repo)
241 repo = getwebview(repo)
241 self.websubtable = webutil.getwebsubs(repo)
242 self.websubtable = webutil.getwebsubs(repo)
242 return repo
243 return repo
243
244
244 @contextlib.contextmanager
245 @contextlib.contextmanager
245 def _obtainrepo(self):
246 def _obtainrepo(self):
246 """Obtain a repo unique to the caller.
247 """Obtain a repo unique to the caller.
247
248
248 Internally we maintain a stack of cachedlocalrepo instances
249 Internally we maintain a stack of cachedlocalrepo instances
249 to be handed out. If one is available, we pop it and return it,
250 to be handed out. If one is available, we pop it and return it,
250 ensuring it is up to date in the process. If one is not available,
251 ensuring it is up to date in the process. If one is not available,
251 we clone the most recently used repo instance and return it.
252 we clone the most recently used repo instance and return it.
252
253
253 It is currently possible for the stack to grow without bounds
254 It is currently possible for the stack to grow without bounds
254 if the server allows infinite threads. However, servers should
255 if the server allows infinite threads. However, servers should
255 have a thread limit, thus establishing our limit.
256 have a thread limit, thus establishing our limit.
256 """
257 """
257 if self._repos:
258 if self._repos:
258 cached = self._repos.pop()
259 cached = self._repos.pop()
259 r, created = cached.fetch()
260 r, created = cached.fetch()
260 else:
261 else:
261 cached = self._lastrepo.copy()
262 cached = self._lastrepo.copy()
262 r, created = cached.fetch()
263 r, created = cached.fetch()
263 if created:
264 if created:
264 r = self._webifyrepo(r)
265 r = self._webifyrepo(r)
265
266
266 self._lastrepo = cached
267 self._lastrepo = cached
267 self.mtime = cached.mtime
268 self.mtime = cached.mtime
268 try:
269 try:
269 yield r
270 yield r
270 finally:
271 finally:
271 self._repos.append(cached)
272 self._repos.append(cached)
272
273
273 def run(self):
274 def run(self):
274 """Start a server from CGI environment.
275 """Start a server from CGI environment.
275
276
276 Modern servers should be using WSGI and should avoid this
277 Modern servers should be using WSGI and should avoid this
277 method, if possible.
278 method, if possible.
278 """
279 """
279 if not encoding.environ.get('GATEWAY_INTERFACE',
280 if not encoding.environ.get('GATEWAY_INTERFACE',
280 '').startswith("CGI/1."):
281 '').startswith("CGI/1."):
281 raise RuntimeError("This function is only intended to be "
282 raise RuntimeError("This function is only intended to be "
282 "called while running as a CGI script.")
283 "called while running as a CGI script.")
283 wsgicgi.launch(self)
284 wsgicgi.launch(self)
284
285
285 def __call__(self, env, respond):
286 def __call__(self, env, respond):
286 """Run the WSGI application.
287 """Run the WSGI application.
287
288
288 This may be called by multiple threads.
289 This may be called by multiple threads.
289 """
290 """
290 req = requestmod.wsgirequest(env, respond)
291 req = requestmod.wsgirequest(env, respond)
291 return self.run_wsgi(req)
292 return self.run_wsgi(req)
292
293
293 def run_wsgi(self, wsgireq):
294 def run_wsgi(self, wsgireq):
294 """Internal method to run the WSGI application.
295 """Internal method to run the WSGI application.
295
296
296 This is typically only called by Mercurial. External consumers
297 This is typically only called by Mercurial. External consumers
297 should be using instances of this class as the WSGI application.
298 should be using instances of this class as the WSGI application.
298 """
299 """
299 with self._obtainrepo() as repo:
300 with self._obtainrepo() as repo:
300 profile = repo.ui.configbool('profiling', 'enabled')
301 profile = repo.ui.configbool('profiling', 'enabled')
301 with profiling.profile(repo.ui, enabled=profile):
302 with profiling.profile(repo.ui, enabled=profile):
302 for r in self._runwsgi(wsgireq, repo):
303 for r in self._runwsgi(wsgireq, repo):
303 yield r
304 yield r
304
305
305 def _runwsgi(self, wsgireq, repo):
306 def _runwsgi(self, wsgireq, repo):
306 req = wsgireq.req
307 req = wsgireq.req
307 res = wsgireq.res
308 res = wsgireq.res
308 rctx = requestcontext(self, repo)
309 rctx = requestcontext(self, repo)
309
310
310 # This state is global across all threads.
311 # This state is global across all threads.
311 encoding.encoding = rctx.config('web', 'encoding')
312 encoding.encoding = rctx.config('web', 'encoding')
312 rctx.repo.ui.environ = wsgireq.env
313 rctx.repo.ui.environ = wsgireq.env
313
314
314 if rctx.csp:
315 if rctx.csp:
315 # hgwebdir may have added CSP header. Since we generate our own,
316 # hgwebdir may have added CSP header. Since we generate our own,
316 # replace it.
317 # replace it.
317 wsgireq.headers = [h for h in wsgireq.headers
318 wsgireq.headers = [h for h in wsgireq.headers
318 if h[0] != 'Content-Security-Policy']
319 if h[0] != 'Content-Security-Policy']
319 wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
320 wsgireq.headers.append(('Content-Security-Policy', rctx.csp))
320 res.headers['Content-Security-Policy'] = rctx.csp
321 res.headers['Content-Security-Policy'] = rctx.csp
321
322
322 handled = wireprotoserver.handlewsgirequest(
323 handled = wireprotoserver.handlewsgirequest(
323 rctx, wsgireq, req, res, self.check_perm)
324 rctx, wsgireq, req, res, self.check_perm)
324 if handled:
325 if handled:
325 return res.sendresponse()
326 return res.sendresponse()
326
327
327 if req.havepathinfo:
328 if req.havepathinfo:
328 query = req.dispatchpath
329 query = req.dispatchpath
329 else:
330 else:
330 query = req.querystring.partition('&')[0].partition(';')[0]
331 query = req.querystring.partition('&')[0].partition(';')[0]
331
332
332 # translate user-visible url structure to internal structure
333 # translate user-visible url structure to internal structure
333
334
334 args = query.split('/', 2)
335 args = query.split('/', 2)
335 if 'cmd' not in req.qsparams and args and args[0]:
336 if 'cmd' not in req.qsparams and args and args[0]:
336 cmd = args.pop(0)
337 cmd = args.pop(0)
337 style = cmd.rfind('-')
338 style = cmd.rfind('-')
338 if style != -1:
339 if style != -1:
339 req.qsparams['style'] = cmd[:style]
340 req.qsparams['style'] = cmd[:style]
340 cmd = cmd[style + 1:]
341 cmd = cmd[style + 1:]
341
342
342 # avoid accepting e.g. style parameter as command
343 # avoid accepting e.g. style parameter as command
343 if util.safehasattr(webcommands, cmd):
344 if util.safehasattr(webcommands, cmd):
344 req.qsparams['cmd'] = cmd
345 req.qsparams['cmd'] = cmd
345
346
346 if cmd == 'static':
347 if cmd == 'static':
347 req.qsparams['file'] = '/'.join(args)
348 req.qsparams['file'] = '/'.join(args)
348 else:
349 else:
349 if args and args[0]:
350 if args and args[0]:
350 node = args.pop(0).replace('%2F', '/')
351 node = args.pop(0).replace('%2F', '/')
351 req.qsparams['node'] = node
352 req.qsparams['node'] = node
352 if args:
353 if args:
353 if 'file' in req.qsparams:
354 if 'file' in req.qsparams:
354 del req.qsparams['file']
355 del req.qsparams['file']
355 for a in args:
356 for a in args:
356 req.qsparams.add('file', a)
357 req.qsparams.add('file', a)
357
358
358 ua = req.headers.get('User-Agent', '')
359 ua = req.headers.get('User-Agent', '')
359 if cmd == 'rev' and 'mercurial' in ua:
360 if cmd == 'rev' and 'mercurial' in ua:
360 req.qsparams['style'] = 'raw'
361 req.qsparams['style'] = 'raw'
361
362
362 if cmd == 'archive':
363 if cmd == 'archive':
363 fn = req.qsparams['node']
364 fn = req.qsparams['node']
364 for type_, spec in rctx.archivespecs.iteritems():
365 for type_, spec in rctx.archivespecs.iteritems():
365 ext = spec[2]
366 ext = spec[2]
366 if fn.endswith(ext):
367 if fn.endswith(ext):
367 req.qsparams['node'] = fn[:-len(ext)]
368 req.qsparams['node'] = fn[:-len(ext)]
368 req.qsparams['type'] = type_
369 req.qsparams['type'] = type_
369 else:
370 else:
370 cmd = req.qsparams.get('cmd', '')
371 cmd = req.qsparams.get('cmd', '')
371
372
372 # process the web interface request
373 # process the web interface request
373
374
374 try:
375 try:
375 tmpl = rctx.templater(wsgireq, req)
376 tmpl = rctx.templater(req)
376 ctype = tmpl('mimetype', encoding=encoding.encoding)
377 ctype = tmpl('mimetype', encoding=encoding.encoding)
377 ctype = templater.stringify(ctype)
378 ctype = templater.stringify(ctype)
378
379
379 # check read permissions non-static content
380 # check read permissions non-static content
380 if cmd != 'static':
381 if cmd != 'static':
381 self.check_perm(rctx, wsgireq, None)
382 self.check_perm(rctx, wsgireq, None)
382
383
383 if cmd == '':
384 if cmd == '':
384 req.qsparams['cmd'] = tmpl.cache['default']
385 req.qsparams['cmd'] = tmpl.cache['default']
385 cmd = req.qsparams['cmd']
386 cmd = req.qsparams['cmd']
386
387
387 # Don't enable caching if using a CSP nonce because then it wouldn't
388 # Don't enable caching if using a CSP nonce because then it wouldn't
388 # be a nonce.
389 # be a nonce.
389 if rctx.configbool('web', 'cache') and not rctx.nonce:
390 if rctx.configbool('web', 'cache') and not rctx.nonce:
390 caching(self, wsgireq) # sets ETag header or raises NOT_MODIFIED
391 caching(self, wsgireq) # sets ETag header or raises NOT_MODIFIED
391 if cmd not in webcommands.__all__:
392 if cmd not in webcommands.__all__:
392 msg = 'no such method: %s' % cmd
393 msg = 'no such method: %s' % cmd
393 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
394 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
394 elif cmd == 'file' and req.qsparams.get('style') == 'raw':
395 elif cmd == 'file' and req.qsparams.get('style') == 'raw':
395 rctx.ctype = ctype
396 rctx.ctype = ctype
396 content = webcommands.rawfile(rctx, wsgireq, tmpl)
397 content = webcommands.rawfile(rctx, wsgireq, tmpl)
397 else:
398 else:
398 content = getattr(webcommands, cmd)(rctx, wsgireq, tmpl)
399 content = getattr(webcommands, cmd)(rctx, wsgireq, tmpl)
399 wsgireq.respond(HTTP_OK, ctype)
400 wsgireq.respond(HTTP_OK, ctype)
400
401
401 return content
402 return content
402
403
403 except (error.LookupError, error.RepoLookupError) as err:
404 except (error.LookupError, error.RepoLookupError) as err:
404 wsgireq.respond(HTTP_NOT_FOUND, ctype)
405 wsgireq.respond(HTTP_NOT_FOUND, ctype)
405 msg = pycompat.bytestr(err)
406 msg = pycompat.bytestr(err)
406 if (util.safehasattr(err, 'name') and
407 if (util.safehasattr(err, 'name') and
407 not isinstance(err, error.ManifestLookupError)):
408 not isinstance(err, error.ManifestLookupError)):
408 msg = 'revision not found: %s' % err.name
409 msg = 'revision not found: %s' % err.name
409 return tmpl('error', error=msg)
410 return tmpl('error', error=msg)
410 except (error.RepoError, error.RevlogError) as inst:
411 except (error.RepoError, error.RevlogError) as inst:
411 wsgireq.respond(HTTP_SERVER_ERROR, ctype)
412 wsgireq.respond(HTTP_SERVER_ERROR, ctype)
412 return tmpl('error', error=pycompat.bytestr(inst))
413 return tmpl('error', error=pycompat.bytestr(inst))
413 except ErrorResponse as inst:
414 except ErrorResponse as inst:
414 wsgireq.respond(inst, ctype)
415 wsgireq.respond(inst, ctype)
415 if inst.code == HTTP_NOT_MODIFIED:
416 if inst.code == HTTP_NOT_MODIFIED:
416 # Not allowed to return a body on a 304
417 # Not allowed to return a body on a 304
417 return ['']
418 return ['']
418 return tmpl('error', error=pycompat.bytestr(inst))
419 return tmpl('error', error=pycompat.bytestr(inst))
419
420
420 def check_perm(self, rctx, req, op):
421 def check_perm(self, rctx, req, op):
421 for permhook in permhooks:
422 for permhook in permhooks:
422 permhook(rctx, req, op)
423 permhook(rctx, req, op)
423
424
424 def getwebview(repo):
425 def getwebview(repo):
425 """The 'web.view' config controls changeset filter to hgweb. Possible
426 """The 'web.view' config controls changeset filter to hgweb. Possible
426 values are ``served``, ``visible`` and ``all``. Default is ``served``.
427 values are ``served``, ``visible`` and ``all``. Default is ``served``.
427 The ``served`` filter only shows changesets that can be pulled from the
428 The ``served`` filter only shows changesets that can be pulled from the
428 hgweb instance. The``visible`` filter includes secret changesets but
429 hgweb instance. The``visible`` filter includes secret changesets but
429 still excludes "hidden" one.
430 still excludes "hidden" one.
430
431
431 See the repoview module for details.
432 See the repoview module for details.
432
433
433 The option has been around undocumented since Mercurial 2.5, but no
434 The option has been around undocumented since Mercurial 2.5, but no
434 user ever asked about it. So we better keep it undocumented for now."""
435 user ever asked about it. So we better keep it undocumented for now."""
435 # experimental config: web.view
436 # experimental config: web.view
436 viewconfig = repo.ui.config('web', 'view', untrusted=True)
437 viewconfig = repo.ui.config('web', 'view', untrusted=True)
437 if viewconfig == 'all':
438 if viewconfig == 'all':
438 return repo.unfiltered()
439 return repo.unfiltered()
439 elif viewconfig in repoview.filtertable:
440 elif viewconfig in repoview.filtertable:
440 return repo.filtered(viewconfig)
441 return repo.filtered(viewconfig)
441 else:
442 else:
442 return repo.filtered('served')
443 return repo.filtered('served')
@@ -1,547 +1,550 b''
1 # hgweb/request.py - An http request from either CGI or the standalone server.
1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 #
2 #
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import socket
12 import socket
13 import wsgiref.headers as wsgiheaders
13 import wsgiref.headers as wsgiheaders
14 #import wsgiref.validate
14 #import wsgiref.validate
15
15
16 from .common import (
16 from .common import (
17 ErrorResponse,
17 ErrorResponse,
18 HTTP_NOT_MODIFIED,
18 HTTP_NOT_MODIFIED,
19 statusmessage,
19 statusmessage,
20 )
20 )
21
21
22 from ..thirdparty import (
22 from ..thirdparty import (
23 attr,
23 attr,
24 )
24 )
25 from .. import (
25 from .. import (
26 error,
26 error,
27 pycompat,
27 pycompat,
28 util,
28 util,
29 )
29 )
30
30
31 class multidict(object):
31 class multidict(object):
32 """A dict like object that can store multiple values for a key.
32 """A dict like object that can store multiple values for a key.
33
33
34 Used to store parsed request parameters.
34 Used to store parsed request parameters.
35
35
36 This is inspired by WebOb's class of the same name.
36 This is inspired by WebOb's class of the same name.
37 """
37 """
38 def __init__(self):
38 def __init__(self):
39 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
39 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
40 # don't rely on parameters that much, so it shouldn't be a perf issue.
40 # don't rely on parameters that much, so it shouldn't be a perf issue.
41 # we can always add dict for fast lookups.
41 # we can always add dict for fast lookups.
42 self._items = []
42 self._items = []
43
43
44 def __getitem__(self, key):
44 def __getitem__(self, key):
45 """Returns the last set value for a key."""
45 """Returns the last set value for a key."""
46 for k, v in reversed(self._items):
46 for k, v in reversed(self._items):
47 if k == key:
47 if k == key:
48 return v
48 return v
49
49
50 raise KeyError(key)
50 raise KeyError(key)
51
51
52 def __setitem__(self, key, value):
52 def __setitem__(self, key, value):
53 """Replace a values for a key with a new value."""
53 """Replace a values for a key with a new value."""
54 try:
54 try:
55 del self[key]
55 del self[key]
56 except KeyError:
56 except KeyError:
57 pass
57 pass
58
58
59 self._items.append((key, value))
59 self._items.append((key, value))
60
60
61 def __delitem__(self, key):
61 def __delitem__(self, key):
62 """Delete all values for a key."""
62 """Delete all values for a key."""
63 oldlen = len(self._items)
63 oldlen = len(self._items)
64
64
65 self._items[:] = [(k, v) for k, v in self._items if k != key]
65 self._items[:] = [(k, v) for k, v in self._items if k != key]
66
66
67 if oldlen == len(self._items):
67 if oldlen == len(self._items):
68 raise KeyError(key)
68 raise KeyError(key)
69
69
70 def __contains__(self, key):
70 def __contains__(self, key):
71 return any(k == key for k, v in self._items)
71 return any(k == key for k, v in self._items)
72
72
73 def __len__(self):
73 def __len__(self):
74 return len(self._items)
74 return len(self._items)
75
75
76 def get(self, key, default=None):
76 def get(self, key, default=None):
77 try:
77 try:
78 return self.__getitem__(key)
78 return self.__getitem__(key)
79 except KeyError:
79 except KeyError:
80 return default
80 return default
81
81
82 def add(self, key, value):
82 def add(self, key, value):
83 """Add a new value for a key. Does not replace existing values."""
83 """Add a new value for a key. Does not replace existing values."""
84 self._items.append((key, value))
84 self._items.append((key, value))
85
85
86 def getall(self, key):
86 def getall(self, key):
87 """Obtains all values for a key."""
87 """Obtains all values for a key."""
88 return [v for k, v in self._items if k == key]
88 return [v for k, v in self._items if k == key]
89
89
90 def getone(self, key):
90 def getone(self, key):
91 """Obtain a single value for a key.
91 """Obtain a single value for a key.
92
92
93 Raises KeyError if key not defined or it has multiple values set.
93 Raises KeyError if key not defined or it has multiple values set.
94 """
94 """
95 vals = self.getall(key)
95 vals = self.getall(key)
96
96
97 if not vals:
97 if not vals:
98 raise KeyError(key)
98 raise KeyError(key)
99
99
100 if len(vals) > 1:
100 if len(vals) > 1:
101 raise KeyError('multiple values for %r' % key)
101 raise KeyError('multiple values for %r' % key)
102
102
103 return vals[0]
103 return vals[0]
104
104
105 def asdictoflists(self):
105 def asdictoflists(self):
106 d = {}
106 d = {}
107 for k, v in self._items:
107 for k, v in self._items:
108 if k in d:
108 if k in d:
109 d[k].append(v)
109 d[k].append(v)
110 else:
110 else:
111 d[k] = [v]
111 d[k] = [v]
112
112
113 return d
113 return d
114
114
115 @attr.s(frozen=True)
115 @attr.s(frozen=True)
116 class parsedrequest(object):
116 class parsedrequest(object):
117 """Represents a parsed WSGI request.
117 """Represents a parsed WSGI request.
118
118
119 Contains both parsed parameters as well as a handle on the input stream.
119 Contains both parsed parameters as well as a handle on the input stream.
120 """
120 """
121
121
122 # Request method.
122 # Request method.
123 method = attr.ib()
123 method = attr.ib()
124 # Full URL for this request.
124 # Full URL for this request.
125 url = attr.ib()
125 url = attr.ib()
126 # URL without any path components. Just <proto>://<host><port>.
126 # URL without any path components. Just <proto>://<host><port>.
127 baseurl = attr.ib()
127 baseurl = attr.ib()
128 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
128 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
129 # of HTTP: Host header for hostname. This is likely what clients used.
129 # of HTTP: Host header for hostname. This is likely what clients used.
130 advertisedurl = attr.ib()
130 advertisedurl = attr.ib()
131 advertisedbaseurl = attr.ib()
131 advertisedbaseurl = attr.ib()
132 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
132 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
133 urlscheme = attr.ib()
133 urlscheme = attr.ib()
134 # Value of REMOTE_USER, if set, or None.
134 # Value of REMOTE_USER, if set, or None.
135 remoteuser = attr.ib()
135 remoteuser = attr.ib()
136 # Value of REMOTE_HOST, if set, or None.
136 # Value of REMOTE_HOST, if set, or None.
137 remotehost = attr.ib()
137 remotehost = attr.ib()
138 # WSGI application path.
138 # WSGI application path.
139 apppath = attr.ib()
139 apppath = attr.ib()
140 # List of path parts to be used for dispatch.
140 # List of path parts to be used for dispatch.
141 dispatchparts = attr.ib()
141 dispatchparts = attr.ib()
142 # URL path component (no query string) used for dispatch.
142 # URL path component (no query string) used for dispatch.
143 dispatchpath = attr.ib()
143 dispatchpath = attr.ib()
144 # Whether there is a path component to this request. This can be true
144 # Whether there is a path component to this request. This can be true
145 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
145 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
146 havepathinfo = attr.ib()
146 havepathinfo = attr.ib()
147 # The name of the repository being accessed.
148 reponame = attr.ib()
147 # Raw query string (part after "?" in URL).
149 # Raw query string (part after "?" in URL).
148 querystring = attr.ib()
150 querystring = attr.ib()
149 # multidict of query string parameters.
151 # multidict of query string parameters.
150 qsparams = attr.ib()
152 qsparams = attr.ib()
151 # wsgiref.headers.Headers instance. Operates like a dict with case
153 # wsgiref.headers.Headers instance. Operates like a dict with case
152 # insensitive keys.
154 # insensitive keys.
153 headers = attr.ib()
155 headers = attr.ib()
154 # Request body input stream.
156 # Request body input stream.
155 bodyfh = attr.ib()
157 bodyfh = attr.ib()
156
158
157 def parserequestfromenv(env, bodyfh):
159 def parserequestfromenv(env, bodyfh):
158 """Parse URL components from environment variables.
160 """Parse URL components from environment variables.
159
161
160 WSGI defines request attributes via environment variables. This function
162 WSGI defines request attributes via environment variables. This function
161 parses the environment variables into a data structure.
163 parses the environment variables into a data structure.
162 """
164 """
163 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
165 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
164
166
165 # We first validate that the incoming object conforms with the WSGI spec.
167 # We first validate that the incoming object conforms with the WSGI spec.
166 # We only want to be dealing with spec-conforming WSGI implementations.
168 # We only want to be dealing with spec-conforming WSGI implementations.
167 # TODO enable this once we fix internal violations.
169 # TODO enable this once we fix internal violations.
168 #wsgiref.validate.check_environ(env)
170 #wsgiref.validate.check_environ(env)
169
171
170 # PEP-0333 states that environment keys and values are native strings
172 # PEP-0333 states that environment keys and values are native strings
171 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
173 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
172 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
174 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
173 # in Mercurial, so mass convert string keys and values to bytes.
175 # in Mercurial, so mass convert string keys and values to bytes.
174 if pycompat.ispy3:
176 if pycompat.ispy3:
175 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
177 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
176 env = {k: v.encode('latin-1') if isinstance(v, str) else v
178 env = {k: v.encode('latin-1') if isinstance(v, str) else v
177 for k, v in env.iteritems()}
179 for k, v in env.iteritems()}
178
180
179 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
181 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
180 # the environment variables.
182 # the environment variables.
181 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
183 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
182 # how URLs are reconstructed.
184 # how URLs are reconstructed.
183 fullurl = env['wsgi.url_scheme'] + '://'
185 fullurl = env['wsgi.url_scheme'] + '://'
184 advertisedfullurl = fullurl
186 advertisedfullurl = fullurl
185
187
186 def addport(s):
188 def addport(s):
187 if env['wsgi.url_scheme'] == 'https':
189 if env['wsgi.url_scheme'] == 'https':
188 if env['SERVER_PORT'] != '443':
190 if env['SERVER_PORT'] != '443':
189 s += ':' + env['SERVER_PORT']
191 s += ':' + env['SERVER_PORT']
190 else:
192 else:
191 if env['SERVER_PORT'] != '80':
193 if env['SERVER_PORT'] != '80':
192 s += ':' + env['SERVER_PORT']
194 s += ':' + env['SERVER_PORT']
193
195
194 return s
196 return s
195
197
196 if env.get('HTTP_HOST'):
198 if env.get('HTTP_HOST'):
197 fullurl += env['HTTP_HOST']
199 fullurl += env['HTTP_HOST']
198 else:
200 else:
199 fullurl += env['SERVER_NAME']
201 fullurl += env['SERVER_NAME']
200 fullurl = addport(fullurl)
202 fullurl = addport(fullurl)
201
203
202 advertisedfullurl += env['SERVER_NAME']
204 advertisedfullurl += env['SERVER_NAME']
203 advertisedfullurl = addport(advertisedfullurl)
205 advertisedfullurl = addport(advertisedfullurl)
204
206
205 baseurl = fullurl
207 baseurl = fullurl
206 advertisedbaseurl = advertisedfullurl
208 advertisedbaseurl = advertisedfullurl
207
209
208 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
210 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
209 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
211 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
210 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
212 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
211 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
213 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
212
214
213 if env.get('QUERY_STRING'):
215 if env.get('QUERY_STRING'):
214 fullurl += '?' + env['QUERY_STRING']
216 fullurl += '?' + env['QUERY_STRING']
215 advertisedfullurl += '?' + env['QUERY_STRING']
217 advertisedfullurl += '?' + env['QUERY_STRING']
216
218
217 # When dispatching requests, we look at the URL components (PATH_INFO
219 # When dispatching requests, we look at the URL components (PATH_INFO
218 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
220 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
219 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
221 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
220 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
222 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
221 # root. We also exclude its path components from PATH_INFO when resolving
223 # root. We also exclude its path components from PATH_INFO when resolving
222 # the dispatch path.
224 # the dispatch path.
223
225
224 apppath = env['SCRIPT_NAME']
226 apppath = env['SCRIPT_NAME']
225
227
226 if env.get('REPO_NAME'):
228 if env.get('REPO_NAME'):
227 if not apppath.endswith('/'):
229 if not apppath.endswith('/'):
228 apppath += '/'
230 apppath += '/'
229
231
230 apppath += env.get('REPO_NAME')
232 apppath += env.get('REPO_NAME')
231
233
232 if 'PATH_INFO' in env:
234 if 'PATH_INFO' in env:
233 dispatchparts = env['PATH_INFO'].strip('/').split('/')
235 dispatchparts = env['PATH_INFO'].strip('/').split('/')
234
236
235 # Strip out repo parts.
237 # Strip out repo parts.
236 repoparts = env.get('REPO_NAME', '').split('/')
238 repoparts = env.get('REPO_NAME', '').split('/')
237 if dispatchparts[:len(repoparts)] == repoparts:
239 if dispatchparts[:len(repoparts)] == repoparts:
238 dispatchparts = dispatchparts[len(repoparts):]
240 dispatchparts = dispatchparts[len(repoparts):]
239 else:
241 else:
240 dispatchparts = []
242 dispatchparts = []
241
243
242 dispatchpath = '/'.join(dispatchparts)
244 dispatchpath = '/'.join(dispatchparts)
243
245
244 querystring = env.get('QUERY_STRING', '')
246 querystring = env.get('QUERY_STRING', '')
245
247
246 # We store as a list so we have ordering information. We also store as
248 # We store as a list so we have ordering information. We also store as
247 # a dict to facilitate fast lookup.
249 # a dict to facilitate fast lookup.
248 qsparams = multidict()
250 qsparams = multidict()
249 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
251 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
250 qsparams.add(k, v)
252 qsparams.add(k, v)
251
253
252 # HTTP_* keys contain HTTP request headers. The Headers structure should
254 # HTTP_* keys contain HTTP request headers. The Headers structure should
253 # perform case normalization for us. We just rewrite underscore to dash
255 # perform case normalization for us. We just rewrite underscore to dash
254 # so keys match what likely went over the wire.
256 # so keys match what likely went over the wire.
255 headers = []
257 headers = []
256 for k, v in env.iteritems():
258 for k, v in env.iteritems():
257 if k.startswith('HTTP_'):
259 if k.startswith('HTTP_'):
258 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
260 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
259
261
260 headers = wsgiheaders.Headers(headers)
262 headers = wsgiheaders.Headers(headers)
261
263
262 # This is kind of a lie because the HTTP header wasn't explicitly
264 # This is kind of a lie because the HTTP header wasn't explicitly
263 # sent. But for all intents and purposes it should be OK to lie about
265 # sent. But for all intents and purposes it should be OK to lie about
264 # this, since a consumer will either either value to determine how many
266 # this, since a consumer will either either value to determine how many
265 # bytes are available to read.
267 # bytes are available to read.
266 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
268 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
267 headers['Content-Length'] = env['CONTENT_LENGTH']
269 headers['Content-Length'] = env['CONTENT_LENGTH']
268
270
269 # TODO do this once we remove wsgirequest.inp, otherwise we could have
271 # TODO do this once we remove wsgirequest.inp, otherwise we could have
270 # multiple readers from the underlying input stream.
272 # multiple readers from the underlying input stream.
271 #bodyfh = env['wsgi.input']
273 #bodyfh = env['wsgi.input']
272 #if 'Content-Length' in headers:
274 #if 'Content-Length' in headers:
273 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
275 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
274
276
275 return parsedrequest(method=env['REQUEST_METHOD'],
277 return parsedrequest(method=env['REQUEST_METHOD'],
276 url=fullurl, baseurl=baseurl,
278 url=fullurl, baseurl=baseurl,
277 advertisedurl=advertisedfullurl,
279 advertisedurl=advertisedfullurl,
278 advertisedbaseurl=advertisedbaseurl,
280 advertisedbaseurl=advertisedbaseurl,
279 urlscheme=env['wsgi.url_scheme'],
281 urlscheme=env['wsgi.url_scheme'],
280 remoteuser=env.get('REMOTE_USER'),
282 remoteuser=env.get('REMOTE_USER'),
281 remotehost=env.get('REMOTE_HOST'),
283 remotehost=env.get('REMOTE_HOST'),
282 apppath=apppath,
284 apppath=apppath,
283 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
285 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
284 havepathinfo='PATH_INFO' in env,
286 havepathinfo='PATH_INFO' in env,
287 reponame=env.get('REPO_NAME'),
285 querystring=querystring,
288 querystring=querystring,
286 qsparams=qsparams,
289 qsparams=qsparams,
287 headers=headers,
290 headers=headers,
288 bodyfh=bodyfh)
291 bodyfh=bodyfh)
289
292
290 class wsgiresponse(object):
293 class wsgiresponse(object):
291 """Represents a response to a WSGI request.
294 """Represents a response to a WSGI request.
292
295
293 A response consists of a status line, headers, and a body.
296 A response consists of a status line, headers, and a body.
294
297
295 Consumers must populate the ``status`` and ``headers`` fields and
298 Consumers must populate the ``status`` and ``headers`` fields and
296 make a call to a ``setbody*()`` method before the response can be
299 make a call to a ``setbody*()`` method before the response can be
297 issued.
300 issued.
298
301
299 When it is time to start sending the response over the wire,
302 When it is time to start sending the response over the wire,
300 ``sendresponse()`` is called. It handles emitting the header portion
303 ``sendresponse()`` is called. It handles emitting the header portion
301 of the response message. It then yields chunks of body data to be
304 of the response message. It then yields chunks of body data to be
302 written to the peer. Typically, the WSGI application itself calls
305 written to the peer. Typically, the WSGI application itself calls
303 and returns the value from ``sendresponse()``.
306 and returns the value from ``sendresponse()``.
304 """
307 """
305
308
306 def __init__(self, req, startresponse):
309 def __init__(self, req, startresponse):
307 """Create an empty response tied to a specific request.
310 """Create an empty response tied to a specific request.
308
311
309 ``req`` is a ``parsedrequest``. ``startresponse`` is the
312 ``req`` is a ``parsedrequest``. ``startresponse`` is the
310 ``start_response`` function passed to the WSGI application.
313 ``start_response`` function passed to the WSGI application.
311 """
314 """
312 self._req = req
315 self._req = req
313 self._startresponse = startresponse
316 self._startresponse = startresponse
314
317
315 self.status = None
318 self.status = None
316 self.headers = wsgiheaders.Headers([])
319 self.headers = wsgiheaders.Headers([])
317
320
318 self._bodybytes = None
321 self._bodybytes = None
319 self._bodygen = None
322 self._bodygen = None
320 self._started = False
323 self._started = False
321
324
322 def setbodybytes(self, b):
325 def setbodybytes(self, b):
323 """Define the response body as static bytes."""
326 """Define the response body as static bytes."""
324 if self._bodybytes is not None or self._bodygen is not None:
327 if self._bodybytes is not None or self._bodygen is not None:
325 raise error.ProgrammingError('cannot define body multiple times')
328 raise error.ProgrammingError('cannot define body multiple times')
326
329
327 self._bodybytes = b
330 self._bodybytes = b
328 self.headers['Content-Length'] = '%d' % len(b)
331 self.headers['Content-Length'] = '%d' % len(b)
329
332
330 def setbodygen(self, gen):
333 def setbodygen(self, gen):
331 """Define the response body as a generator of bytes."""
334 """Define the response body as a generator of bytes."""
332 if self._bodybytes is not None or self._bodygen is not None:
335 if self._bodybytes is not None or self._bodygen is not None:
333 raise error.ProgrammingError('cannot define body multiple times')
336 raise error.ProgrammingError('cannot define body multiple times')
334
337
335 self._bodygen = gen
338 self._bodygen = gen
336
339
337 def sendresponse(self):
340 def sendresponse(self):
338 """Send the generated response to the client.
341 """Send the generated response to the client.
339
342
340 Before this is called, ``status`` must be set and one of
343 Before this is called, ``status`` must be set and one of
341 ``setbodybytes()`` or ``setbodygen()`` must be called.
344 ``setbodybytes()`` or ``setbodygen()`` must be called.
342
345
343 Calling this method multiple times is not allowed.
346 Calling this method multiple times is not allowed.
344 """
347 """
345 if self._started:
348 if self._started:
346 raise error.ProgrammingError('sendresponse() called multiple times')
349 raise error.ProgrammingError('sendresponse() called multiple times')
347
350
348 self._started = True
351 self._started = True
349
352
350 if not self.status:
353 if not self.status:
351 raise error.ProgrammingError('status line not defined')
354 raise error.ProgrammingError('status line not defined')
352
355
353 if self._bodybytes is None and self._bodygen is None:
356 if self._bodybytes is None and self._bodygen is None:
354 raise error.ProgrammingError('response body not defined')
357 raise error.ProgrammingError('response body not defined')
355
358
356 # Various HTTP clients (notably httplib) won't read the HTTP response
359 # Various HTTP clients (notably httplib) won't read the HTTP response
357 # until the HTTP request has been sent in full. If servers (us) send a
360 # until the HTTP request has been sent in full. If servers (us) send a
358 # response before the HTTP request has been fully sent, the connection
361 # response before the HTTP request has been fully sent, the connection
359 # may deadlock because neither end is reading.
362 # may deadlock because neither end is reading.
360 #
363 #
361 # We work around this by "draining" the request data before
364 # We work around this by "draining" the request data before
362 # sending any response in some conditions.
365 # sending any response in some conditions.
363 drain = False
366 drain = False
364 close = False
367 close = False
365
368
366 # If the client sent Expect: 100-continue, we assume it is smart enough
369 # If the client sent Expect: 100-continue, we assume it is smart enough
367 # to deal with the server sending a response before reading the request.
370 # to deal with the server sending a response before reading the request.
368 # (httplib doesn't do this.)
371 # (httplib doesn't do this.)
369 if self._req.headers.get('Expect', '').lower() == '100-continue':
372 if self._req.headers.get('Expect', '').lower() == '100-continue':
370 pass
373 pass
371 # Only tend to request methods that have bodies. Strictly speaking,
374 # Only tend to request methods that have bodies. Strictly speaking,
372 # we should sniff for a body. But this is fine for our existing
375 # we should sniff for a body. But this is fine for our existing
373 # WSGI applications.
376 # WSGI applications.
374 elif self._req.method not in ('POST', 'PUT'):
377 elif self._req.method not in ('POST', 'PUT'):
375 pass
378 pass
376 else:
379 else:
377 # If we don't know how much data to read, there's no guarantee
380 # If we don't know how much data to read, there's no guarantee
378 # that we can drain the request responsibly. The WSGI
381 # that we can drain the request responsibly. The WSGI
379 # specification only says that servers *should* ensure the
382 # specification only says that servers *should* ensure the
380 # input stream doesn't overrun the actual request. So there's
383 # input stream doesn't overrun the actual request. So there's
381 # no guarantee that reading until EOF won't corrupt the stream
384 # no guarantee that reading until EOF won't corrupt the stream
382 # state.
385 # state.
383 if not isinstance(self._req.bodyfh, util.cappedreader):
386 if not isinstance(self._req.bodyfh, util.cappedreader):
384 close = True
387 close = True
385 else:
388 else:
386 # We /could/ only drain certain HTTP response codes. But 200 and
389 # We /could/ only drain certain HTTP response codes. But 200 and
387 # non-200 wire protocol responses both require draining. Since
390 # non-200 wire protocol responses both require draining. Since
388 # we have a capped reader in place for all situations where we
391 # we have a capped reader in place for all situations where we
389 # drain, it is safe to read from that stream. We'll either do
392 # drain, it is safe to read from that stream. We'll either do
390 # a drain or no-op if we're already at EOF.
393 # a drain or no-op if we're already at EOF.
391 drain = True
394 drain = True
392
395
393 if close:
396 if close:
394 self.headers['Connection'] = 'Close'
397 self.headers['Connection'] = 'Close'
395
398
396 if drain:
399 if drain:
397 assert isinstance(self._req.bodyfh, util.cappedreader)
400 assert isinstance(self._req.bodyfh, util.cappedreader)
398 while True:
401 while True:
399 chunk = self._req.bodyfh.read(32768)
402 chunk = self._req.bodyfh.read(32768)
400 if not chunk:
403 if not chunk:
401 break
404 break
402
405
403 self._startresponse(pycompat.sysstr(self.status), self.headers.items())
406 self._startresponse(pycompat.sysstr(self.status), self.headers.items())
404 if self._bodybytes:
407 if self._bodybytes:
405 yield self._bodybytes
408 yield self._bodybytes
406 elif self._bodygen:
409 elif self._bodygen:
407 for chunk in self._bodygen:
410 for chunk in self._bodygen:
408 yield chunk
411 yield chunk
409 else:
412 else:
410 error.ProgrammingError('do not know how to send body')
413 error.ProgrammingError('do not know how to send body')
411
414
412 class wsgirequest(object):
415 class wsgirequest(object):
413 """Higher-level API for a WSGI request.
416 """Higher-level API for a WSGI request.
414
417
415 WSGI applications are invoked with 2 arguments. They are used to
418 WSGI applications are invoked with 2 arguments. They are used to
416 instantiate instances of this class, which provides higher-level APIs
419 instantiate instances of this class, which provides higher-level APIs
417 for obtaining request parameters, writing HTTP output, etc.
420 for obtaining request parameters, writing HTTP output, etc.
418 """
421 """
419 def __init__(self, wsgienv, start_response):
422 def __init__(self, wsgienv, start_response):
420 version = wsgienv[r'wsgi.version']
423 version = wsgienv[r'wsgi.version']
421 if (version < (1, 0)) or (version >= (2, 0)):
424 if (version < (1, 0)) or (version >= (2, 0)):
422 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
425 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
423 % version)
426 % version)
424
427
425 inp = wsgienv[r'wsgi.input']
428 inp = wsgienv[r'wsgi.input']
426
429
427 if r'HTTP_CONTENT_LENGTH' in wsgienv:
430 if r'HTTP_CONTENT_LENGTH' in wsgienv:
428 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
431 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
429 elif r'CONTENT_LENGTH' in wsgienv:
432 elif r'CONTENT_LENGTH' in wsgienv:
430 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
433 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
431
434
432 self.err = wsgienv[r'wsgi.errors']
435 self.err = wsgienv[r'wsgi.errors']
433 self.threaded = wsgienv[r'wsgi.multithread']
436 self.threaded = wsgienv[r'wsgi.multithread']
434 self.multiprocess = wsgienv[r'wsgi.multiprocess']
437 self.multiprocess = wsgienv[r'wsgi.multiprocess']
435 self.run_once = wsgienv[r'wsgi.run_once']
438 self.run_once = wsgienv[r'wsgi.run_once']
436 self.env = wsgienv
439 self.env = wsgienv
437 self.req = parserequestfromenv(wsgienv, inp)
440 self.req = parserequestfromenv(wsgienv, inp)
438 self.res = wsgiresponse(self.req, start_response)
441 self.res = wsgiresponse(self.req, start_response)
439 self._start_response = start_response
442 self._start_response = start_response
440 self.server_write = None
443 self.server_write = None
441 self.headers = []
444 self.headers = []
442
445
443 def respond(self, status, type, filename=None, body=None):
446 def respond(self, status, type, filename=None, body=None):
444 if not isinstance(type, str):
447 if not isinstance(type, str):
445 type = pycompat.sysstr(type)
448 type = pycompat.sysstr(type)
446 if self._start_response is not None:
449 if self._start_response is not None:
447 self.headers.append((r'Content-Type', type))
450 self.headers.append((r'Content-Type', type))
448 if filename:
451 if filename:
449 filename = (filename.rpartition('/')[-1]
452 filename = (filename.rpartition('/')[-1]
450 .replace('\\', '\\\\').replace('"', '\\"'))
453 .replace('\\', '\\\\').replace('"', '\\"'))
451 self.headers.append(('Content-Disposition',
454 self.headers.append(('Content-Disposition',
452 'inline; filename="%s"' % filename))
455 'inline; filename="%s"' % filename))
453 if body is not None:
456 if body is not None:
454 self.headers.append((r'Content-Length', str(len(body))))
457 self.headers.append((r'Content-Length', str(len(body))))
455
458
456 for k, v in self.headers:
459 for k, v in self.headers:
457 if not isinstance(v, str):
460 if not isinstance(v, str):
458 raise TypeError('header value must be string: %r' % (v,))
461 raise TypeError('header value must be string: %r' % (v,))
459
462
460 if isinstance(status, ErrorResponse):
463 if isinstance(status, ErrorResponse):
461 self.headers.extend(status.headers)
464 self.headers.extend(status.headers)
462 if status.code == HTTP_NOT_MODIFIED:
465 if status.code == HTTP_NOT_MODIFIED:
463 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
466 # RFC 2616 Section 10.3.5: 304 Not Modified has cases where
464 # it MUST NOT include any headers other than these and no
467 # it MUST NOT include any headers other than these and no
465 # body
468 # body
466 self.headers = [(k, v) for (k, v) in self.headers if
469 self.headers = [(k, v) for (k, v) in self.headers if
467 k in ('Date', 'ETag', 'Expires',
470 k in ('Date', 'ETag', 'Expires',
468 'Cache-Control', 'Vary')]
471 'Cache-Control', 'Vary')]
469 status = statusmessage(status.code, pycompat.bytestr(status))
472 status = statusmessage(status.code, pycompat.bytestr(status))
470 elif status == 200:
473 elif status == 200:
471 status = '200 Script output follows'
474 status = '200 Script output follows'
472 elif isinstance(status, int):
475 elif isinstance(status, int):
473 status = statusmessage(status)
476 status = statusmessage(status)
474
477
475 # Various HTTP clients (notably httplib) won't read the HTTP
478 # Various HTTP clients (notably httplib) won't read the HTTP
476 # response until the HTTP request has been sent in full. If servers
479 # response until the HTTP request has been sent in full. If servers
477 # (us) send a response before the HTTP request has been fully sent,
480 # (us) send a response before the HTTP request has been fully sent,
478 # the connection may deadlock because neither end is reading.
481 # the connection may deadlock because neither end is reading.
479 #
482 #
480 # We work around this by "draining" the request data before
483 # We work around this by "draining" the request data before
481 # sending any response in some conditions.
484 # sending any response in some conditions.
482 drain = False
485 drain = False
483 close = False
486 close = False
484
487
485 # If the client sent Expect: 100-continue, we assume it is smart
488 # If the client sent Expect: 100-continue, we assume it is smart
486 # enough to deal with the server sending a response before reading
489 # enough to deal with the server sending a response before reading
487 # the request. (httplib doesn't do this.)
490 # the request. (httplib doesn't do this.)
488 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
491 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
489 pass
492 pass
490 # Only tend to request methods that have bodies. Strictly speaking,
493 # Only tend to request methods that have bodies. Strictly speaking,
491 # we should sniff for a body. But this is fine for our existing
494 # we should sniff for a body. But this is fine for our existing
492 # WSGI applications.
495 # WSGI applications.
493 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
496 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
494 pass
497 pass
495 else:
498 else:
496 # If we don't know how much data to read, there's no guarantee
499 # If we don't know how much data to read, there's no guarantee
497 # that we can drain the request responsibly. The WSGI
500 # that we can drain the request responsibly. The WSGI
498 # specification only says that servers *should* ensure the
501 # specification only says that servers *should* ensure the
499 # input stream doesn't overrun the actual request. So there's
502 # input stream doesn't overrun the actual request. So there's
500 # no guarantee that reading until EOF won't corrupt the stream
503 # no guarantee that reading until EOF won't corrupt the stream
501 # state.
504 # state.
502 if not isinstance(self.req.bodyfh, util.cappedreader):
505 if not isinstance(self.req.bodyfh, util.cappedreader):
503 close = True
506 close = True
504 else:
507 else:
505 # We /could/ only drain certain HTTP response codes. But 200
508 # We /could/ only drain certain HTTP response codes. But 200
506 # and non-200 wire protocol responses both require draining.
509 # and non-200 wire protocol responses both require draining.
507 # Since we have a capped reader in place for all situations
510 # Since we have a capped reader in place for all situations
508 # where we drain, it is safe to read from that stream. We'll
511 # where we drain, it is safe to read from that stream. We'll
509 # either do a drain or no-op if we're already at EOF.
512 # either do a drain or no-op if we're already at EOF.
510 drain = True
513 drain = True
511
514
512 if close:
515 if close:
513 self.headers.append((r'Connection', r'Close'))
516 self.headers.append((r'Connection', r'Close'))
514
517
515 if drain:
518 if drain:
516 assert isinstance(self.req.bodyfh, util.cappedreader)
519 assert isinstance(self.req.bodyfh, util.cappedreader)
517 while True:
520 while True:
518 chunk = self.req.bodyfh.read(32768)
521 chunk = self.req.bodyfh.read(32768)
519 if not chunk:
522 if not chunk:
520 break
523 break
521
524
522 self.server_write = self._start_response(
525 self.server_write = self._start_response(
523 pycompat.sysstr(status), self.headers)
526 pycompat.sysstr(status), self.headers)
524 self._start_response = None
527 self._start_response = None
525 self.headers = []
528 self.headers = []
526 if body is not None:
529 if body is not None:
527 self.write(body)
530 self.write(body)
528 self.server_write = None
531 self.server_write = None
529
532
530 def write(self, thing):
533 def write(self, thing):
531 if thing:
534 if thing:
532 try:
535 try:
533 self.server_write(thing)
536 self.server_write(thing)
534 except socket.error as inst:
537 except socket.error as inst:
535 if inst[0] != errno.ECONNRESET:
538 if inst[0] != errno.ECONNRESET:
536 raise
539 raise
537
540
538 def flush(self):
541 def flush(self):
539 return None
542 return None
540
543
541 def wsgiapplication(app_maker):
544 def wsgiapplication(app_maker):
542 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
545 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
543 can and should now be used as a WSGI application.'''
546 can and should now be used as a WSGI application.'''
544 application = app_maker()
547 application = app_maker()
545 def run_wsgi(env, respond):
548 def run_wsgi(env, respond):
546 return application(env, respond)
549 return application(env, respond)
547 return run_wsgi
550 return run_wsgi
General Comments 0
You need to be logged in to leave comments. Login now