##// END OF EJS Templates
hgweb: use separate repo instances per thread...
Gregory Szorc -
r26220:a43328ba default
parent child Browse files
Show More
@@ -1,411 +1,437
1 1 # hgweb/hgweb_mod.py - Web interface for a repository.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 import contextlib
9 10 import os
10 11 from mercurial import ui, hg, hook, error, encoding, templater, util, repoview
11 12 from mercurial.templatefilters import websub
12 13 from common import get_stat, ErrorResponse, permhooks, caching
13 14 from common import HTTP_OK, HTTP_NOT_MODIFIED, HTTP_BAD_REQUEST
14 15 from common import HTTP_NOT_FOUND, HTTP_SERVER_ERROR
15 16 from request import wsgirequest
16 17 import webcommands, protocol, webutil
17 18
18 19 perms = {
19 20 'changegroup': 'pull',
20 21 'changegroupsubset': 'pull',
21 22 'getbundle': 'pull',
22 23 'stream_out': 'pull',
23 24 'listkeys': 'pull',
24 25 'unbundle': 'push',
25 26 'pushkey': 'push',
26 27 }
27 28
28 29 def makebreadcrumb(url, prefix=''):
29 30 '''Return a 'URL breadcrumb' list
30 31
31 32 A 'URL breadcrumb' is a list of URL-name pairs,
32 33 corresponding to each of the path items on a URL.
33 34 This can be used to create path navigation entries.
34 35 '''
35 36 if url.endswith('/'):
36 37 url = url[:-1]
37 38 if prefix:
38 39 url = '/' + prefix + url
39 40 relpath = url
40 41 if relpath.startswith('/'):
41 42 relpath = relpath[1:]
42 43
43 44 breadcrumb = []
44 45 urlel = url
45 46 pathitems = [''] + relpath.split('/')
46 47 for pathel in reversed(pathitems):
47 48 if not pathel or not urlel:
48 49 break
49 50 breadcrumb.append({'url': urlel, 'name': pathel})
50 51 urlel = os.path.dirname(urlel)
51 52 return reversed(breadcrumb)
52 53
53 54 class requestcontext(object):
54 55 """Holds state/context for an individual request.
55 56
56 57 Servers can be multi-threaded. Holding state on the WSGI application
57 58 is prone to race conditions. Instances of this class exist to hold
58 59 mutable and race-free state for requests.
59 60 """
60 61 def __init__(self, app, repo):
61 62 self.repo = repo
62 63 self.reponame = app.reponame
63 64
64 65 self.archives = ('zip', 'gz', 'bz2')
65 66
66 67 self.maxchanges = self.configint('web', 'maxchanges', 10)
67 68 self.stripecount = self.configint('web', 'stripes', 1)
68 69 self.maxshortchanges = self.configint('web', 'maxshortchanges', 60)
69 70 self.maxfiles = self.configint('web', 'maxfiles', 10)
70 71 self.allowpull = self.configbool('web', 'allowpull', True)
71 72
72 73 # we use untrusted=False to prevent a repo owner from using
73 74 # web.templates in .hg/hgrc to get access to any file readable
74 75 # by the user running the CGI script
75 76 self.templatepath = self.config('web', 'templates', untrusted=False)
76 77
77 78 # This object is more expensive to build than simple config values.
78 79 # It is shared across requests. The app will replace the object
79 80 # if it is updated. Since this is a reference and nothing should
80 81 # modify the underlying object, it should be constant for the lifetime
81 82 # of the request.
82 83 self.websubtable = app.websubtable
83 84
84 85 # Trust the settings from the .hg/hgrc files by default.
85 86 def config(self, section, name, default=None, untrusted=True):
86 87 return self.repo.ui.config(section, name, default,
87 88 untrusted=untrusted)
88 89
89 90 def configbool(self, section, name, default=False, untrusted=True):
90 91 return self.repo.ui.configbool(section, name, default,
91 92 untrusted=untrusted)
92 93
93 94 def configint(self, section, name, default=None, untrusted=True):
94 95 return self.repo.ui.configint(section, name, default,
95 96 untrusted=untrusted)
96 97
97 98 def configlist(self, section, name, default=None, untrusted=True):
98 99 return self.repo.ui.configlist(section, name, default,
99 100 untrusted=untrusted)
100 101
101 102 archivespecs = {
102 103 'bz2': ('application/x-bzip2', 'tbz2', '.tar.bz2', None),
103 104 'gz': ('application/x-gzip', 'tgz', '.tar.gz', None),
104 105 'zip': ('application/zip', 'zip', '.zip', None),
105 106 }
106 107
107 108 def archivelist(self, nodeid):
108 109 allowed = self.configlist('web', 'allow_archive')
109 110 for typ, spec in self.archivespecs.iteritems():
110 111 if typ in allowed or self.configbool('web', 'allow%s' % typ):
111 112 yield {'type': typ, 'extension': spec[2], 'node': nodeid}
112 113
113 114 def templater(self, req):
114 115 # determine scheme, port and server name
115 116 # this is needed to create absolute urls
116 117
117 118 proto = req.env.get('wsgi.url_scheme')
118 119 if proto == 'https':
119 120 proto = 'https'
120 121 default_port = '443'
121 122 else:
122 123 proto = 'http'
123 124 default_port = '80'
124 125
125 126 port = req.env['SERVER_PORT']
126 127 port = port != default_port and (':' + port) or ''
127 128 urlbase = '%s://%s%s' % (proto, req.env['SERVER_NAME'], port)
128 129 logourl = self.config('web', 'logourl', 'http://mercurial.selenic.com/')
129 130 logoimg = self.config('web', 'logoimg', 'hglogo.png')
130 131 staticurl = self.config('web', 'staticurl') or req.url + 'static/'
131 132 if not staticurl.endswith('/'):
132 133 staticurl += '/'
133 134
134 135 # some functions for the templater
135 136
136 137 def motd(**map):
137 138 yield self.config('web', 'motd', '')
138 139
139 140 # figure out which style to use
140 141
141 142 vars = {}
142 143 styles = (
143 144 req.form.get('style', [None])[0],
144 145 self.config('web', 'style'),
145 146 'paper',
146 147 )
147 148 style, mapfile = templater.stylemap(styles, self.templatepath)
148 149 if style == styles[0]:
149 150 vars['style'] = style
150 151
151 152 start = req.url[-1] == '?' and '&' or '?'
152 153 sessionvars = webutil.sessionvars(vars, start)
153 154
154 155 if not self.reponame:
155 156 self.reponame = (self.config('web', 'name')
156 157 or req.env.get('REPO_NAME')
157 158 or req.url.strip('/') or self.repo.root)
158 159
159 160 def websubfilter(text):
160 161 return websub(text, self.websubtable)
161 162
162 163 # create the templater
163 164
164 165 tmpl = templater.templater(mapfile,
165 166 filters={'websub': websubfilter},
166 167 defaults={'url': req.url,
167 168 'logourl': logourl,
168 169 'logoimg': logoimg,
169 170 'staticurl': staticurl,
170 171 'urlbase': urlbase,
171 172 'repo': self.reponame,
172 173 'encoding': encoding.encoding,
173 174 'motd': motd,
174 175 'sessionvars': sessionvars,
175 176 'pathdef': makebreadcrumb(req.url),
176 177 'style': style,
177 178 })
178 179 return tmpl
179 180
180 181
181 182 class hgweb(object):
182 183 """HTTP server for individual repositories.
183 184
184 185 Instances of this class serve HTTP responses for a particular
185 186 repository.
186 187
187 188 Instances are typically used as WSGI applications.
188 189
189 190 Some servers are multi-threaded. On these servers, there may
190 191 be multiple active threads inside __call__.
191 192 """
192 193 def __init__(self, repo, name=None, baseui=None):
193 194 if isinstance(repo, str):
194 195 if baseui:
195 196 u = baseui.copy()
196 197 else:
197 198 u = ui.ui()
198 199 r = hg.repository(u, repo)
199 200 else:
200 201 # we trust caller to give us a private copy
201 202 r = repo
202 203
203 204 r.ui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
204 205 r.baseui.setconfig('ui', 'report_untrusted', 'off', 'hgweb')
205 206 r.ui.setconfig('ui', 'nontty', 'true', 'hgweb')
206 207 r.baseui.setconfig('ui', 'nontty', 'true', 'hgweb')
207 208 # displaying bundling progress bar while serving feel wrong and may
208 209 # break some wsgi implementation.
209 210 r.ui.setconfig('progress', 'disable', 'true', 'hgweb')
210 211 r.baseui.setconfig('progress', 'disable', 'true', 'hgweb')
211 self._repo = hg.cachedlocalrepo(self._webifyrepo(r))
212 self._repos = [hg.cachedlocalrepo(self._webifyrepo(r))]
213 self._lastrepo = self._repos[0]
212 214 hook.redirect(True)
213 215 self.reponame = name
214 216
215 217 def _webifyrepo(self, repo):
216 218 repo = getwebview(repo)
217 219 self.websubtable = webutil.getwebsubs(repo)
218 220 return repo
219 221
220 def _getrepo(self):
221 r, created = self._repo.fetch()
222 if created:
223 r = self._webifyrepo(r)
222 @contextlib.contextmanager
223 def _obtainrepo(self):
224 """Obtain a repo unique to the caller.
225
226 Internally we maintain a stack of cachedlocalrepo instances
227 to be handed out. If one is available, we pop it and return it,
228 ensuring it is up to date in the process. If one is not available,
229 we clone the most recently used repo instance and return it.
224 230
225 self.mtime = self._repo.mtime
226 return r
231 It is currently possible for the stack to grow without bounds
232 if the server allows infinite threads. However, servers should
233 have a thread limit, thus establishing our limit.
234 """
235 if self._repos:
236 cached = self._repos.pop()
237 r, created = cached.fetch()
238 if created:
239 r = self._webifyrepo(r)
240 else:
241 cached = self._lastrepo.copy()
242 r, created = cached.fetch()
243
244 self._lastrepo = cached
245 self.mtime = cached.mtime
246 try:
247 yield r
248 finally:
249 self._repos.append(cached)
227 250
228 251 def run(self):
229 252 """Start a server from CGI environment.
230 253
231 254 Modern servers should be using WSGI and should avoid this
232 255 method, if possible.
233 256 """
234 257 if not os.environ.get('GATEWAY_INTERFACE', '').startswith("CGI/1."):
235 258 raise RuntimeError("This function is only intended to be "
236 259 "called while running as a CGI script.")
237 260 import mercurial.hgweb.wsgicgi as wsgicgi
238 261 wsgicgi.launch(self)
239 262
240 263 def __call__(self, env, respond):
241 264 """Run the WSGI application.
242 265
243 266 This may be called by multiple threads.
244 267 """
245 268 req = wsgirequest(env, respond)
246 269 return self.run_wsgi(req)
247 270
248 271 def run_wsgi(self, req):
249 272 """Internal method to run the WSGI application.
250 273
251 274 This is typically only called by Mercurial. External consumers
252 275 should be using instances of this class as the WSGI application.
253 276 """
254 repo = self._getrepo()
277 with self._obtainrepo() as repo:
278 return self._runwsgi(req, repo)
279
280 def _runwsgi(self, req, repo):
255 281 rctx = requestcontext(self, repo)
256 282
257 283 # This state is global across all threads.
258 284 encoding.encoding = rctx.config('web', 'encoding', encoding.encoding)
259 285 rctx.repo.ui.environ = req.env
260 286
261 287 # work with CGI variables to create coherent structure
262 288 # use SCRIPT_NAME, PATH_INFO and QUERY_STRING as well as our REPO_NAME
263 289
264 290 req.url = req.env['SCRIPT_NAME']
265 291 if not req.url.endswith('/'):
266 292 req.url += '/'
267 293 if 'REPO_NAME' in req.env:
268 294 req.url += req.env['REPO_NAME'] + '/'
269 295
270 296 if 'PATH_INFO' in req.env:
271 297 parts = req.env['PATH_INFO'].strip('/').split('/')
272 298 repo_parts = req.env.get('REPO_NAME', '').split('/')
273 299 if parts[:len(repo_parts)] == repo_parts:
274 300 parts = parts[len(repo_parts):]
275 301 query = '/'.join(parts)
276 302 else:
277 303 query = req.env['QUERY_STRING'].split('&', 1)[0]
278 304 query = query.split(';', 1)[0]
279 305
280 306 # process this if it's a protocol request
281 307 # protocol bits don't need to create any URLs
282 308 # and the clients always use the old URL structure
283 309
284 310 cmd = req.form.get('cmd', [''])[0]
285 311 if protocol.iscmd(cmd):
286 312 try:
287 313 if query:
288 314 raise ErrorResponse(HTTP_NOT_FOUND)
289 315 if cmd in perms:
290 316 self.check_perm(rctx, req, perms[cmd])
291 317 return protocol.call(rctx.repo, req, cmd)
292 318 except ErrorResponse as inst:
293 319 # A client that sends unbundle without 100-continue will
294 320 # break if we respond early.
295 321 if (cmd == 'unbundle' and
296 322 (req.env.get('HTTP_EXPECT',
297 323 '').lower() != '100-continue') or
298 324 req.env.get('X-HgHttp2', '')):
299 325 req.drain()
300 326 else:
301 327 req.headers.append(('Connection', 'Close'))
302 328 req.respond(inst, protocol.HGTYPE,
303 329 body='0\n%s\n' % inst)
304 330 return ''
305 331
306 332 # translate user-visible url structure to internal structure
307 333
308 334 args = query.split('/', 2)
309 335 if 'cmd' not in req.form and args and args[0]:
310 336
311 337 cmd = args.pop(0)
312 338 style = cmd.rfind('-')
313 339 if style != -1:
314 340 req.form['style'] = [cmd[:style]]
315 341 cmd = cmd[style + 1:]
316 342
317 343 # avoid accepting e.g. style parameter as command
318 344 if util.safehasattr(webcommands, cmd):
319 345 req.form['cmd'] = [cmd]
320 346
321 347 if cmd == 'static':
322 348 req.form['file'] = ['/'.join(args)]
323 349 else:
324 350 if args and args[0]:
325 351 node = args.pop(0).replace('%2F', '/')
326 352 req.form['node'] = [node]
327 353 if args:
328 354 req.form['file'] = args
329 355
330 356 ua = req.env.get('HTTP_USER_AGENT', '')
331 357 if cmd == 'rev' and 'mercurial' in ua:
332 358 req.form['style'] = ['raw']
333 359
334 360 if cmd == 'archive':
335 361 fn = req.form['node'][0]
336 362 for type_, spec in rctx.archivespecs.iteritems():
337 363 ext = spec[2]
338 364 if fn.endswith(ext):
339 365 req.form['node'] = [fn[:-len(ext)]]
340 366 req.form['type'] = [type_]
341 367
342 368 # process the web interface request
343 369
344 370 try:
345 371 tmpl = rctx.templater(req)
346 372 ctype = tmpl('mimetype', encoding=encoding.encoding)
347 373 ctype = templater.stringify(ctype)
348 374
349 375 # check read permissions non-static content
350 376 if cmd != 'static':
351 377 self.check_perm(rctx, req, None)
352 378
353 379 if cmd == '':
354 380 req.form['cmd'] = [tmpl.cache['default']]
355 381 cmd = req.form['cmd'][0]
356 382
357 383 if rctx.configbool('web', 'cache', True):
358 384 caching(self, req) # sets ETag header or raises NOT_MODIFIED
359 385 if cmd not in webcommands.__all__:
360 386 msg = 'no such method: %s' % cmd
361 387 raise ErrorResponse(HTTP_BAD_REQUEST, msg)
362 388 elif cmd == 'file' and 'raw' in req.form.get('style', []):
363 389 rctx.ctype = ctype
364 390 content = webcommands.rawfile(rctx, req, tmpl)
365 391 else:
366 392 content = getattr(webcommands, cmd)(rctx, req, tmpl)
367 393 req.respond(HTTP_OK, ctype)
368 394
369 395 return content
370 396
371 397 except (error.LookupError, error.RepoLookupError) as err:
372 398 req.respond(HTTP_NOT_FOUND, ctype)
373 399 msg = str(err)
374 400 if (util.safehasattr(err, 'name') and
375 401 not isinstance(err, error.ManifestLookupError)):
376 402 msg = 'revision not found: %s' % err.name
377 403 return tmpl('error', error=msg)
378 404 except (error.RepoError, error.RevlogError) as inst:
379 405 req.respond(HTTP_SERVER_ERROR, ctype)
380 406 return tmpl('error', error=str(inst))
381 407 except ErrorResponse as inst:
382 408 req.respond(inst, ctype)
383 409 if inst.code == HTTP_NOT_MODIFIED:
384 410 # Not allowed to return a body on a 304
385 411 return ['']
386 412 return tmpl('error', error=str(inst))
387 413
388 414 def check_perm(self, rctx, req, op):
389 415 for permhook in permhooks:
390 416 permhook(rctx, req, op)
391 417
392 418 def getwebview(repo):
393 419 """The 'web.view' config controls changeset filter to hgweb. Possible
394 420 values are ``served``, ``visible`` and ``all``. Default is ``served``.
395 421 The ``served`` filter only shows changesets that can be pulled from the
396 422 hgweb instance. The``visible`` filter includes secret changesets but
397 423 still excludes "hidden" one.
398 424
399 425 See the repoview module for details.
400 426
401 427 The option has been around undocumented since Mercurial 2.5, but no
402 428 user ever asked about it. So we better keep it undocumented for now."""
403 429 viewconfig = repo.ui.config('web', 'view', 'served',
404 430 untrusted=True)
405 431 if viewconfig == 'all':
406 432 return repo.unfiltered()
407 433 elif viewconfig in repoview.filtertable:
408 434 return repo.filtered(viewconfig)
409 435 else:
410 436 return repo.filtered('served')
411 437
@@ -1,82 +1,83
1 1 Tests if hgweb can run without touching sys.stdin, as is required
2 2 by the WSGI standard and strictly implemented by mod_wsgi.
3 3
4 4 $ hg init repo
5 5 $ cd repo
6 6 $ echo foo > bar
7 7 $ hg add bar
8 8 $ hg commit -m "test"
9 9 $ cat > request.py <<EOF
10 10 > from mercurial import dispatch
11 11 > from mercurial.hgweb.hgweb_mod import hgweb
12 12 > from mercurial.ui import ui
13 13 > from mercurial import hg
14 14 > from StringIO import StringIO
15 15 > import os, sys
16 16 >
17 17 > class FileLike(object):
18 18 > def __init__(self, real):
19 19 > self.real = real
20 20 > def fileno(self):
21 21 > print >> sys.__stdout__, 'FILENO'
22 22 > return self.real.fileno()
23 23 > def read(self):
24 24 > print >> sys.__stdout__, 'READ'
25 25 > return self.real.read()
26 26 > def readline(self):
27 27 > print >> sys.__stdout__, 'READLINE'
28 28 > return self.real.readline()
29 29 >
30 30 > sys.stdin = FileLike(sys.stdin)
31 31 > errors = StringIO()
32 32 > input = StringIO()
33 33 > output = StringIO()
34 34 >
35 35 > def startrsp(status, headers):
36 36 > print '---- STATUS'
37 37 > print status
38 38 > print '---- HEADERS'
39 39 > print [i for i in headers if i[0] != 'ETag']
40 40 > print '---- DATA'
41 41 > return output.write
42 42 >
43 43 > env = {
44 44 > 'wsgi.version': (1, 0),
45 45 > 'wsgi.url_scheme': 'http',
46 46 > 'wsgi.errors': errors,
47 47 > 'wsgi.input': input,
48 48 > 'wsgi.multithread': False,
49 49 > 'wsgi.multiprocess': False,
50 50 > 'wsgi.run_once': False,
51 51 > 'REQUEST_METHOD': 'GET',
52 52 > 'SCRIPT_NAME': '',
53 53 > 'PATH_INFO': '',
54 54 > 'QUERY_STRING': '',
55 55 > 'SERVER_NAME': '127.0.0.1',
56 56 > 'SERVER_PORT': os.environ['HGPORT'],
57 57 > 'SERVER_PROTOCOL': 'HTTP/1.0'
58 58 > }
59 59 >
60 60 > i = hgweb('.')
61 61 > i(env, startrsp)
62 62 > print '---- ERRORS'
63 63 > print errors.getvalue()
64 64 > print '---- OS.ENVIRON wsgi variables'
65 65 > print sorted([x for x in os.environ if x.startswith('wsgi')])
66 66 > print '---- request.ENVIRON wsgi variables'
67 > print sorted([x for x in i._getrepo().ui.environ if x.startswith('wsgi')])
67 > with i._obtainrepo() as repo:
68 > print sorted([x for x in repo.ui.environ if x.startswith('wsgi')])
68 69 > EOF
69 70 $ python request.py
70 71 ---- STATUS
71 72 200 Script output follows
72 73 ---- HEADERS
73 74 [('Content-Type', 'text/html; charset=ascii')]
74 75 ---- DATA
75 76 ---- ERRORS
76 77
77 78 ---- OS.ENVIRON wsgi variables
78 79 []
79 80 ---- request.ENVIRON wsgi variables
80 81 ['wsgi.errors', 'wsgi.input', 'wsgi.multiprocess', 'wsgi.multithread', 'wsgi.run_once', 'wsgi.url_scheme', 'wsgi.version']
81 82
82 83 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now