##// END OF EJS Templates
hgweb: support constructing URLs from an alternate base URL...
Gregory Szorc -
r36916:219b2335 default
parent child Browse files
Show More
@@ -1,566 +1,529
1 1 # hgweb/hgwebdir_mod.py - Web interface for a directory of repositories.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import os
12 12 import re
13 13 import time
14 14
15 15 from ..i18n import _
16 16
17 17 from .common import (
18 18 ErrorResponse,
19 19 HTTP_NOT_FOUND,
20 20 HTTP_OK,
21 21 HTTP_SERVER_ERROR,
22 22 cspvalues,
23 23 get_contact,
24 24 get_mtime,
25 25 ismember,
26 26 paritygen,
27 27 staticfile,
28 28 )
29 29
30 30 from .. import (
31 31 configitems,
32 32 encoding,
33 33 error,
34 34 hg,
35 35 profiling,
36 pycompat,
37 36 scmutil,
38 37 templater,
39 38 ui as uimod,
40 39 util,
41 40 )
42 41
43 42 from . import (
44 43 hgweb_mod,
45 44 request as requestmod,
46 45 webutil,
47 46 wsgicgi,
48 47 )
49 48 from ..utils import dateutil
50 49
51 50 def cleannames(items):
52 51 return [(util.pconvert(name).strip('/'), path) for name, path in items]
53 52
54 53 def findrepos(paths):
55 54 repos = []
56 55 for prefix, root in cleannames(paths):
57 56 roothead, roottail = os.path.split(root)
58 57 # "foo = /bar/*" or "foo = /bar/**" lets every repo /bar/N in or below
59 58 # /bar/ be served as as foo/N .
60 59 # '*' will not search inside dirs with .hg (except .hg/patches),
61 60 # '**' will search inside dirs with .hg (and thus also find subrepos).
62 61 try:
63 62 recurse = {'*': False, '**': True}[roottail]
64 63 except KeyError:
65 64 repos.append((prefix, root))
66 65 continue
67 66 roothead = os.path.normpath(os.path.abspath(roothead))
68 67 paths = scmutil.walkrepos(roothead, followsym=True, recurse=recurse)
69 68 repos.extend(urlrepos(prefix, roothead, paths))
70 69 return repos
71 70
72 71 def urlrepos(prefix, roothead, paths):
73 72 """yield url paths and filesystem paths from a list of repo paths
74 73
75 74 >>> conv = lambda seq: [(v, util.pconvert(p)) for v,p in seq]
76 75 >>> conv(urlrepos(b'hg', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
77 76 [('hg/r', '/opt/r'), ('hg/r/r', '/opt/r/r'), ('hg', '/opt')]
78 77 >>> conv(urlrepos(b'', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
79 78 [('r', '/opt/r'), ('r/r', '/opt/r/r'), ('', '/opt')]
80 79 """
81 80 for path in paths:
82 81 path = os.path.normpath(path)
83 82 yield (prefix + '/' +
84 83 util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
85 84
86 def geturlcgivars(baseurl, port):
87 """
88 Extract CGI variables from baseurl
89
90 >>> geturlcgivars(b"http://host.org/base", b"80")
91 ('host.org', '80', '/base')
92 >>> geturlcgivars(b"http://host.org:8000/base", b"80")
93 ('host.org', '8000', '/base')
94 >>> geturlcgivars(b'/base', 8000)
95 ('', '8000', '/base')
96 >>> geturlcgivars(b"base", b'8000')
97 ('', '8000', '/base')
98 >>> geturlcgivars(b"http://host", b'8000')
99 ('host', '8000', '/')
100 >>> geturlcgivars(b"http://host/", b'8000')
101 ('host', '8000', '/')
102 """
103 u = util.url(baseurl)
104 name = u.host or ''
105 if u.port:
106 port = u.port
107 path = u.path or ""
108 if not path.startswith('/'):
109 path = '/' + path
110
111 return name, pycompat.bytestr(port), path
112
113 85 def readallowed(ui, req):
114 86 """Check allow_read and deny_read config options of a repo's ui object
115 87 to determine user permissions. By default, with neither option set (or
116 88 both empty), allow all users to read the repo. There are two ways a
117 89 user can be denied read access: (1) deny_read is not empty, and the
118 90 user is unauthenticated or deny_read contains user (or *), and (2)
119 91 allow_read is not empty and the user is not in allow_read. Return True
120 92 if user is allowed to read the repo, else return False."""
121 93
122 94 user = req.remoteuser
123 95
124 96 deny_read = ui.configlist('web', 'deny_read', untrusted=True)
125 97 if deny_read and (not user or ismember(ui, user, deny_read)):
126 98 return False
127 99
128 100 allow_read = ui.configlist('web', 'allow_read', untrusted=True)
129 101 # by default, allow reading if no allow_read option has been set
130 102 if not allow_read or ismember(ui, user, allow_read):
131 103 return True
132 104
133 105 return False
134 106
135 107 def archivelist(ui, nodeid, url):
136 108 allowed = ui.configlist('web', 'allow_archive', untrusted=True)
137 109 archives = []
138 110
139 111 for typ, spec in hgweb_mod.archivespecs.iteritems():
140 112 if typ in allowed or ui.configbool('web', 'allow' + typ,
141 113 untrusted=True):
142 114 archives.append({
143 115 'type': typ,
144 116 'extension': spec[2],
145 117 'node': nodeid,
146 118 'url': url,
147 119 })
148 120
149 121 return archives
150 122
151 123 def rawindexentries(ui, repos, wsgireq, req, subdir=''):
152 124 descend = ui.configbool('web', 'descend')
153 125 collapse = ui.configbool('web', 'collapse')
154 126 seenrepos = set()
155 127 seendirs = set()
156 128 for name, path in repos:
157 129
158 130 if not name.startswith(subdir):
159 131 continue
160 132 name = name[len(subdir):]
161 133 directory = False
162 134
163 135 if '/' in name:
164 136 if not descend:
165 137 continue
166 138
167 139 nameparts = name.split('/')
168 140 rootname = nameparts[0]
169 141
170 142 if not collapse:
171 143 pass
172 144 elif rootname in seendirs:
173 145 continue
174 146 elif rootname in seenrepos:
175 147 pass
176 148 else:
177 149 directory = True
178 150 name = rootname
179 151
180 152 # redefine the path to refer to the directory
181 153 discarded = '/'.join(nameparts[1:])
182 154
183 155 # remove name parts plus accompanying slash
184 156 path = path[:-len(discarded) - 1]
185 157
186 158 try:
187 159 r = hg.repository(ui, path)
188 160 directory = False
189 161 except (IOError, error.RepoError):
190 162 pass
191 163
192 164 parts = [name]
193 165 parts.insert(0, '/' + subdir.rstrip('/'))
194 166 if wsgireq.env['SCRIPT_NAME']:
195 167 parts.insert(0, wsgireq.env['SCRIPT_NAME'])
196 168 url = re.sub(r'/+', '/', '/'.join(parts) + '/')
197 169
198 170 # show either a directory entry or a repository
199 171 if directory:
200 172 # get the directory's time information
201 173 try:
202 174 d = (get_mtime(path), dateutil.makedate()[1])
203 175 except OSError:
204 176 continue
205 177
206 178 # add '/' to the name to make it obvious that
207 179 # the entry is a directory, not a regular repository
208 180 row = {'contact': "",
209 181 'contact_sort': "",
210 182 'name': name + '/',
211 183 'name_sort': name,
212 184 'url': url,
213 185 'description': "",
214 186 'description_sort': "",
215 187 'lastchange': d,
216 188 'lastchange_sort': d[1] - d[0],
217 189 'archives': [],
218 190 'isdirectory': True,
219 191 'labels': [],
220 192 }
221 193
222 194 seendirs.add(name)
223 195 yield row
224 196 continue
225 197
226 198 u = ui.copy()
227 199 try:
228 200 u.readconfig(os.path.join(path, '.hg', 'hgrc'))
229 201 except Exception as e:
230 202 u.warn(_('error reading %s/.hg/hgrc: %s\n') % (path, e))
231 203 continue
232 204
233 205 def get(section, name, default=uimod._unset):
234 206 return u.config(section, name, default, untrusted=True)
235 207
236 208 if u.configbool("web", "hidden", untrusted=True):
237 209 continue
238 210
239 211 if not readallowed(u, req):
240 212 continue
241 213
242 214 # update time with local timezone
243 215 try:
244 216 r = hg.repository(ui, path)
245 217 except IOError:
246 218 u.warn(_('error accessing repository at %s\n') % path)
247 219 continue
248 220 except error.RepoError:
249 221 u.warn(_('error accessing repository at %s\n') % path)
250 222 continue
251 223 try:
252 224 d = (get_mtime(r.spath), dateutil.makedate()[1])
253 225 except OSError:
254 226 continue
255 227
256 228 contact = get_contact(get)
257 229 description = get("web", "description")
258 230 seenrepos.add(name)
259 231 name = get("web", "name", name)
260 232 row = {'contact': contact or "unknown",
261 233 'contact_sort': contact.upper() or "unknown",
262 234 'name': name,
263 235 'name_sort': name,
264 236 'url': url,
265 237 'description': description or "unknown",
266 238 'description_sort': description.upper() or "unknown",
267 239 'lastchange': d,
268 240 'lastchange_sort': d[1] - d[0],
269 241 'archives': archivelist(u, "tip", url),
270 242 'isdirectory': None,
271 243 'labels': u.configlist('web', 'labels', untrusted=True),
272 244 }
273 245
274 246 yield row
275 247
276 248 def indexentries(ui, repos, wsgireq, req, stripecount, sortcolumn='',
277 249 descending=False, subdir=''):
278 250
279 251 rows = rawindexentries(ui, repos, wsgireq, req, subdir=subdir)
280 252
281 253 sortdefault = None, False
282 254
283 255 if sortcolumn and sortdefault != (sortcolumn, descending):
284 256 sortkey = '%s_sort' % sortcolumn
285 257 rows = sorted(rows, key=lambda x: x[sortkey],
286 258 reverse=descending)
287 259
288 260 for row, parity in zip(rows, paritygen(stripecount)):
289 261 row['parity'] = parity
290 262 yield row
291 263
292 264 class hgwebdir(object):
293 265 """HTTP server for multiple repositories.
294 266
295 267 Given a configuration, different repositories will be served depending
296 268 on the request path.
297 269
298 270 Instances are typically used as WSGI applications.
299 271 """
300 272 def __init__(self, conf, baseui=None):
301 273 self.conf = conf
302 274 self.baseui = baseui
303 275 self.ui = None
304 276 self.lastrefresh = 0
305 277 self.motd = None
306 278 self.refresh()
307 279
308 280 def refresh(self):
309 281 if self.ui:
310 282 refreshinterval = self.ui.configint('web', 'refreshinterval')
311 283 else:
312 284 item = configitems.coreitems['web']['refreshinterval']
313 285 refreshinterval = item.default
314 286
315 287 # refreshinterval <= 0 means to always refresh.
316 288 if (refreshinterval > 0 and
317 289 self.lastrefresh + refreshinterval > time.time()):
318 290 return
319 291
320 292 if self.baseui:
321 293 u = self.baseui.copy()
322 294 else:
323 295 u = uimod.ui.load()
324 296 u.setconfig('ui', 'report_untrusted', 'off', 'hgwebdir')
325 297 u.setconfig('ui', 'nontty', 'true', 'hgwebdir')
326 298 # displaying bundling progress bar while serving feels wrong and may
327 299 # break some wsgi implementations.
328 300 u.setconfig('progress', 'disable', 'true', 'hgweb')
329 301
330 302 if not isinstance(self.conf, (dict, list, tuple)):
331 303 map = {'paths': 'hgweb-paths'}
332 304 if not os.path.exists(self.conf):
333 305 raise error.Abort(_('config file %s not found!') % self.conf)
334 306 u.readconfig(self.conf, remap=map, trust=True)
335 307 paths = []
336 308 for name, ignored in u.configitems('hgweb-paths'):
337 309 for path in u.configlist('hgweb-paths', name):
338 310 paths.append((name, path))
339 311 elif isinstance(self.conf, (list, tuple)):
340 312 paths = self.conf
341 313 elif isinstance(self.conf, dict):
342 314 paths = self.conf.items()
343 315
344 316 repos = findrepos(paths)
345 317 for prefix, root in u.configitems('collections'):
346 318 prefix = util.pconvert(prefix)
347 319 for path in scmutil.walkrepos(root, followsym=True):
348 320 repo = os.path.normpath(path)
349 321 name = util.pconvert(repo)
350 322 if name.startswith(prefix):
351 323 name = name[len(prefix):]
352 324 repos.append((name.lstrip('/'), repo))
353 325
354 326 self.repos = repos
355 327 self.ui = u
356 328 encoding.encoding = self.ui.config('web', 'encoding')
357 329 self.style = self.ui.config('web', 'style')
358 330 self.templatepath = self.ui.config('web', 'templates', untrusted=False)
359 331 self.stripecount = self.ui.config('web', 'stripes')
360 332 if self.stripecount:
361 333 self.stripecount = int(self.stripecount)
362 self._baseurl = self.ui.config('web', 'baseurl')
363 334 prefix = self.ui.config('web', 'prefix')
364 335 if prefix.startswith('/'):
365 336 prefix = prefix[1:]
366 337 if prefix.endswith('/'):
367 338 prefix = prefix[:-1]
368 339 self.prefix = prefix
369 340 self.lastrefresh = time.time()
370 341
371 342 def run(self):
372 343 if not encoding.environ.get('GATEWAY_INTERFACE',
373 344 '').startswith("CGI/1."):
374 345 raise RuntimeError("This function is only intended to be "
375 346 "called while running as a CGI script.")
376 347 wsgicgi.launch(self)
377 348
378 349 def __call__(self, env, respond):
379 wsgireq = requestmod.wsgirequest(env, respond)
350 baseurl = self.ui.config('web', 'baseurl')
351 wsgireq = requestmod.wsgirequest(env, respond, altbaseurl=baseurl)
380 352 return self.run_wsgi(wsgireq)
381 353
382 354 def run_wsgi(self, wsgireq):
383 355 profile = self.ui.configbool('profiling', 'enabled')
384 356 with profiling.profile(self.ui, enabled=profile):
385 357 for r in self._runwsgi(wsgireq):
386 358 yield r
387 359
388 360 def _runwsgi(self, wsgireq):
389 361 req = wsgireq.req
390 362 res = wsgireq.res
391 363
392 364 try:
393 365 self.refresh()
394 366
395 367 csp, nonce = cspvalues(self.ui)
396 368 if csp:
397 369 res.headers['Content-Security-Policy'] = csp
398 370 wsgireq.headers.append(('Content-Security-Policy', csp))
399 371
400 372 virtual = wsgireq.env.get("PATH_INFO", "").strip('/')
401 373 tmpl = self.templater(wsgireq, nonce)
402 374 ctype = tmpl('mimetype', encoding=encoding.encoding)
403 375 ctype = templater.stringify(ctype)
404 376
405 377 # Global defaults. These can be overridden by any handler.
406 378 res.status = '200 Script output follows'
407 379 res.headers['Content-Type'] = ctype
408 380
409 381 # a static file
410 382 if virtual.startswith('static/') or 'static' in req.qsparams:
411 383 if virtual.startswith('static/'):
412 384 fname = virtual[7:]
413 385 else:
414 386 fname = req.qsparams['static']
415 387 static = self.ui.config("web", "static", None,
416 388 untrusted=False)
417 389 if not static:
418 390 tp = self.templatepath or templater.templatepaths()
419 391 if isinstance(tp, str):
420 392 tp = [tp]
421 393 static = [os.path.join(p, 'static') for p in tp]
422 394
423 395 staticfile(static, fname, res)
424 396 return res.sendresponse()
425 397
426 398 # top-level index
427 399
428 400 repos = dict(self.repos)
429 401
430 402 if (not virtual or virtual == 'index') and virtual not in repos:
431 403 wsgireq.respond(HTTP_OK, ctype)
432 404 return self.makeindex(wsgireq, tmpl)
433 405
434 406 # nested indexes and hgwebs
435 407
436 408 if virtual.endswith('/index') and virtual not in repos:
437 409 subdir = virtual[:-len('index')]
438 410 if any(r.startswith(subdir) for r in repos):
439 411 wsgireq.respond(HTTP_OK, ctype)
440 412 return self.makeindex(wsgireq, tmpl, subdir)
441 413
442 414 def _virtualdirs():
443 415 # Check the full virtual path, each parent, and the root ('')
444 416 if virtual != '':
445 417 yield virtual
446 418
447 419 for p in util.finddirs(virtual):
448 420 yield p
449 421
450 422 yield ''
451 423
452 424 for virtualrepo in _virtualdirs():
453 425 real = repos.get(virtualrepo)
454 426 if real:
455 427 # Re-parse the WSGI environment to take into account our
456 428 # repository path component.
457 429 wsgireq.req = requestmod.parserequestfromenv(
458 wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo)
430 wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo,
431 altbaseurl=self.ui.config('web', 'baseurl'))
459 432 try:
460 433 # ensure caller gets private copy of ui
461 434 repo = hg.repository(self.ui.copy(), real)
462 435 return hgweb_mod.hgweb(repo).run_wsgi(wsgireq)
463 436 except IOError as inst:
464 437 msg = encoding.strtolocal(inst.strerror)
465 438 raise ErrorResponse(HTTP_SERVER_ERROR, msg)
466 439 except error.RepoError as inst:
467 440 raise ErrorResponse(HTTP_SERVER_ERROR, bytes(inst))
468 441
469 442 # browse subdirectories
470 443 subdir = virtual + '/'
471 444 if [r for r in repos if r.startswith(subdir)]:
472 445 wsgireq.respond(HTTP_OK, ctype)
473 446 return self.makeindex(wsgireq, tmpl, subdir)
474 447
475 448 # prefixes not found
476 449 wsgireq.respond(HTTP_NOT_FOUND, ctype)
477 450 return tmpl("notfound", repo=virtual)
478 451
479 452 except ErrorResponse as err:
480 453 wsgireq.respond(err, ctype)
481 454 return tmpl('error', error=err.message or '')
482 455 finally:
483 456 tmpl = None
484 457
485 458 def makeindex(self, wsgireq, tmpl, subdir=""):
486 459 req = wsgireq.req
487 460
488 461 self.refresh()
489 462 sortable = ["name", "description", "contact", "lastchange"]
490 463 sortcolumn, descending = None, False
491 464 if 'sort' in req.qsparams:
492 465 sortcolumn = req.qsparams['sort']
493 466 descending = sortcolumn.startswith('-')
494 467 if descending:
495 468 sortcolumn = sortcolumn[1:]
496 469 if sortcolumn not in sortable:
497 470 sortcolumn = ""
498 471
499 472 sort = [("sort_%s" % column,
500 473 "%s%s" % ((not descending and column == sortcolumn)
501 474 and "-" or "", column))
502 475 for column in sortable]
503 476
504 477 self.refresh()
505 self.updatereqenv(wsgireq.env)
506 478
507 479 entries = indexentries(self.ui, self.repos, wsgireq, req,
508 480 self.stripecount, sortcolumn=sortcolumn,
509 481 descending=descending, subdir=subdir)
510 482
511 483 return tmpl("index", entries=entries, subdir=subdir,
512 484 pathdef=hgweb_mod.makebreadcrumb('/' + subdir, self.prefix),
513 485 sortcolumn=sortcolumn, descending=descending,
514 486 **dict(sort))
515 487
516 488 def templater(self, wsgireq, nonce):
517 489
518 490 def motd(**map):
519 491 if self.motd is not None:
520 492 yield self.motd
521 493 else:
522 494 yield config('web', 'motd')
523 495
524 496 def config(section, name, default=uimod._unset, untrusted=True):
525 497 return self.ui.config(section, name, default, untrusted)
526 498
527 self.updatereqenv(wsgireq.env)
528
529 499 url = wsgireq.env.get('SCRIPT_NAME', '')
530 500 if not url.endswith('/'):
531 501 url += '/'
532 502
533 503 vars = {}
534 504 styles, (style, mapfile) = hgweb_mod.getstyle(wsgireq.req, config,
535 505 self.templatepath)
536 506 if style == styles[0]:
537 507 vars['style'] = style
538 508
539 509 sessionvars = webutil.sessionvars(vars, r'?')
540 510 logourl = config('web', 'logourl')
541 511 logoimg = config('web', 'logoimg')
542 512 staticurl = (config('web', 'staticurl')
543 513 or wsgireq.req.apppath + '/static/')
544 514 if not staticurl.endswith('/'):
545 515 staticurl += '/'
546 516
547 517 defaults = {
548 518 "encoding": encoding.encoding,
549 519 "motd": motd,
550 520 "url": url,
551 521 "logourl": logourl,
552 522 "logoimg": logoimg,
553 523 "staticurl": staticurl,
554 524 "sessionvars": sessionvars,
555 525 "style": style,
556 526 "nonce": nonce,
557 527 }
558 528 tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
559 529 return tmpl
560
561 def updatereqenv(self, env):
562 if self._baseurl is not None:
563 name, port, path = geturlcgivars(self._baseurl, env['SERVER_PORT'])
564 env['SERVER_NAME'] = name
565 env['SERVER_PORT'] = port
566 env['SCRIPT_NAME'] = path
@@ -1,669 +1,713
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import socket
13 13 import wsgiref.headers as wsgiheaders
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 statusmessage,
19 19 )
20 20
21 21 from ..thirdparty import (
22 22 attr,
23 23 )
24 24 from .. import (
25 25 error,
26 26 pycompat,
27 27 util,
28 28 )
29 29
30 30 class multidict(object):
31 31 """A dict like object that can store multiple values for a key.
32 32
33 33 Used to store parsed request parameters.
34 34
35 35 This is inspired by WebOb's class of the same name.
36 36 """
37 37 def __init__(self):
38 38 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
39 39 # don't rely on parameters that much, so it shouldn't be a perf issue.
40 40 # we can always add dict for fast lookups.
41 41 self._items = []
42 42
43 43 def __getitem__(self, key):
44 44 """Returns the last set value for a key."""
45 45 for k, v in reversed(self._items):
46 46 if k == key:
47 47 return v
48 48
49 49 raise KeyError(key)
50 50
51 51 def __setitem__(self, key, value):
52 52 """Replace a values for a key with a new value."""
53 53 try:
54 54 del self[key]
55 55 except KeyError:
56 56 pass
57 57
58 58 self._items.append((key, value))
59 59
60 60 def __delitem__(self, key):
61 61 """Delete all values for a key."""
62 62 oldlen = len(self._items)
63 63
64 64 self._items[:] = [(k, v) for k, v in self._items if k != key]
65 65
66 66 if oldlen == len(self._items):
67 67 raise KeyError(key)
68 68
69 69 def __contains__(self, key):
70 70 return any(k == key for k, v in self._items)
71 71
72 72 def __len__(self):
73 73 return len(self._items)
74 74
75 75 def get(self, key, default=None):
76 76 try:
77 77 return self.__getitem__(key)
78 78 except KeyError:
79 79 return default
80 80
81 81 def add(self, key, value):
82 82 """Add a new value for a key. Does not replace existing values."""
83 83 self._items.append((key, value))
84 84
85 85 def getall(self, key):
86 86 """Obtains all values for a key."""
87 87 return [v for k, v in self._items if k == key]
88 88
89 89 def getone(self, key):
90 90 """Obtain a single value for a key.
91 91
92 92 Raises KeyError if key not defined or it has multiple values set.
93 93 """
94 94 vals = self.getall(key)
95 95
96 96 if not vals:
97 97 raise KeyError(key)
98 98
99 99 if len(vals) > 1:
100 100 raise KeyError('multiple values for %r' % key)
101 101
102 102 return vals[0]
103 103
104 104 def asdictoflists(self):
105 105 d = {}
106 106 for k, v in self._items:
107 107 if k in d:
108 108 d[k].append(v)
109 109 else:
110 110 d[k] = [v]
111 111
112 112 return d
113 113
114 114 @attr.s(frozen=True)
115 115 class parsedrequest(object):
116 116 """Represents a parsed WSGI request.
117 117
118 118 Contains both parsed parameters as well as a handle on the input stream.
119 119 """
120 120
121 121 # Request method.
122 122 method = attr.ib()
123 123 # Full URL for this request.
124 124 url = attr.ib()
125 125 # URL without any path components. Just <proto>://<host><port>.
126 126 baseurl = attr.ib()
127 127 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
128 128 # of HTTP: Host header for hostname. This is likely what clients used.
129 129 advertisedurl = attr.ib()
130 130 advertisedbaseurl = attr.ib()
131 131 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
132 132 urlscheme = attr.ib()
133 133 # Value of REMOTE_USER, if set, or None.
134 134 remoteuser = attr.ib()
135 135 # Value of REMOTE_HOST, if set, or None.
136 136 remotehost = attr.ib()
137 137 # Relative WSGI application path. If defined, will begin with a
138 138 # ``/``.
139 139 apppath = attr.ib()
140 140 # List of path parts to be used for dispatch.
141 141 dispatchparts = attr.ib()
142 142 # URL path component (no query string) used for dispatch. Can be
143 143 # ``None`` to signal no path component given to the request, an
144 144 # empty string to signal a request to the application's root URL,
145 145 # or a string not beginning with ``/`` containing the requested
146 146 # path under the application.
147 147 dispatchpath = attr.ib()
148 148 # The name of the repository being accessed.
149 149 reponame = attr.ib()
150 150 # Raw query string (part after "?" in URL).
151 151 querystring = attr.ib()
152 152 # multidict of query string parameters.
153 153 qsparams = attr.ib()
154 154 # wsgiref.headers.Headers instance. Operates like a dict with case
155 155 # insensitive keys.
156 156 headers = attr.ib()
157 157 # Request body input stream.
158 158 bodyfh = attr.ib()
159 159
160 def parserequestfromenv(env, bodyfh, reponame=None):
160 def parserequestfromenv(env, bodyfh, reponame=None, altbaseurl=None):
161 161 """Parse URL components from environment variables.
162 162
163 163 WSGI defines request attributes via environment variables. This function
164 164 parses the environment variables into a data structure.
165 165
166 166 If ``reponame`` is defined, the leading path components matching that
167 167 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
168 168 This simulates the world view of a WSGI application that processes
169 169 requests from the base URL of a repo.
170
171 If ``altbaseurl`` (typically comes from ``web.baseurl`` config option)
172 is defined, it is used - instead of the WSGI environment variables - for
173 constructing URL components up to and including the WSGI application path.
174 For example, if the current WSGI application is at ``/repo`` and a request
175 is made to ``/rev/@`` with this argument set to
176 ``http://myserver:9000/prefix``, the URL and path components will resolve as
177 if the request were to ``http://myserver:9000/prefix/rev/@``. In other
178 words, ``wsgi.url_scheme``, ``SERVER_NAME``, ``SERVER_PORT``, and
179 ``SCRIPT_NAME`` are all effectively replaced by components from this URL.
170 180 """
171 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
181 # PEP 3333 defines the WSGI spec and is a useful reference for this code.
172 182
173 183 # We first validate that the incoming object conforms with the WSGI spec.
174 184 # We only want to be dealing with spec-conforming WSGI implementations.
175 185 # TODO enable this once we fix internal violations.
176 186 #wsgiref.validate.check_environ(env)
177 187
178 188 # PEP-0333 states that environment keys and values are native strings
179 189 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
180 190 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
181 191 # in Mercurial, so mass convert string keys and values to bytes.
182 192 if pycompat.ispy3:
183 193 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
184 194 env = {k: v.encode('latin-1') if isinstance(v, str) else v
185 195 for k, v in env.iteritems()}
186 196
197 if altbaseurl:
198 altbaseurl = util.url(altbaseurl)
199
187 200 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
188 201 # the environment variables.
189 202 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
190 203 # how URLs are reconstructed.
191 204 fullurl = env['wsgi.url_scheme'] + '://'
192 advertisedfullurl = fullurl
205
206 if altbaseurl and altbaseurl.scheme:
207 advertisedfullurl = altbaseurl.scheme + '://'
208 else:
209 advertisedfullurl = fullurl
193 210
194 def addport(s):
195 if env['wsgi.url_scheme'] == 'https':
196 if env['SERVER_PORT'] != '443':
197 s += ':' + env['SERVER_PORT']
211 def addport(s, port):
212 if s.startswith('https://'):
213 if port != '443':
214 s += ':' + port
198 215 else:
199 if env['SERVER_PORT'] != '80':
200 s += ':' + env['SERVER_PORT']
216 if port != '80':
217 s += ':' + port
201 218
202 219 return s
203 220
204 221 if env.get('HTTP_HOST'):
205 222 fullurl += env['HTTP_HOST']
206 223 else:
207 224 fullurl += env['SERVER_NAME']
208 fullurl = addport(fullurl)
225 fullurl = addport(fullurl, env['SERVER_PORT'])
226
227 if altbaseurl and altbaseurl.host:
228 advertisedfullurl += altbaseurl.host
209 229
210 advertisedfullurl += env['SERVER_NAME']
211 advertisedfullurl = addport(advertisedfullurl)
230 if altbaseurl.port:
231 port = altbaseurl.port
232 elif altbaseurl.scheme == 'http' and not altbaseurl.port:
233 port = '80'
234 elif altbaseurl.scheme == 'https' and not altbaseurl.port:
235 port = '443'
236 else:
237 port = env['SERVER_PORT']
238
239 advertisedfullurl = addport(advertisedfullurl, port)
240 else:
241 advertisedfullurl += env['SERVER_NAME']
242 advertisedfullurl = addport(advertisedfullurl, env['SERVER_PORT'])
212 243
213 244 baseurl = fullurl
214 245 advertisedbaseurl = advertisedfullurl
215 246
216 247 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
217 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
218 248 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
249
250 if altbaseurl:
251 path = altbaseurl.path or ''
252 if path and not path.startswith('/'):
253 path = '/' + path
254 advertisedfullurl += util.urlreq.quote(path)
255 else:
256 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
257
219 258 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
220 259
221 260 if env.get('QUERY_STRING'):
222 261 fullurl += '?' + env['QUERY_STRING']
223 262 advertisedfullurl += '?' + env['QUERY_STRING']
224 263
225 264 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
226 265 # that represents the repository being dispatched to. When computing
227 266 # the dispatch info, we ignore these leading path components.
228 267
229 apppath = env.get('SCRIPT_NAME', '')
268 if altbaseurl:
269 apppath = altbaseurl.path or ''
270 if apppath and not apppath.startswith('/'):
271 apppath = '/' + apppath
272 else:
273 apppath = env.get('SCRIPT_NAME', '')
230 274
231 275 if reponame:
232 276 repoprefix = '/' + reponame.strip('/')
233 277
234 278 if not env.get('PATH_INFO'):
235 279 raise error.ProgrammingError('reponame requires PATH_INFO')
236 280
237 281 if not env['PATH_INFO'].startswith(repoprefix):
238 282 raise error.ProgrammingError('PATH_INFO does not begin with repo '
239 283 'name: %s (%s)' % (env['PATH_INFO'],
240 284 reponame))
241 285
242 286 dispatchpath = env['PATH_INFO'][len(repoprefix):]
243 287
244 288 if dispatchpath and not dispatchpath.startswith('/'):
245 289 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
246 290 'not end at path delimiter: %s (%s)' %
247 291 (env['PATH_INFO'], reponame))
248 292
249 293 apppath = apppath.rstrip('/') + repoprefix
250 294 dispatchparts = dispatchpath.strip('/').split('/')
251 295 dispatchpath = '/'.join(dispatchparts)
252 296
253 297 elif 'PATH_INFO' in env:
254 298 if env['PATH_INFO'].strip('/'):
255 299 dispatchparts = env['PATH_INFO'].strip('/').split('/')
256 300 dispatchpath = '/'.join(dispatchparts)
257 301 else:
258 302 dispatchparts = []
259 303 dispatchpath = ''
260 304 else:
261 305 dispatchparts = []
262 306 dispatchpath = None
263 307
264 308 querystring = env.get('QUERY_STRING', '')
265 309
266 310 # We store as a list so we have ordering information. We also store as
267 311 # a dict to facilitate fast lookup.
268 312 qsparams = multidict()
269 313 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
270 314 qsparams.add(k, v)
271 315
272 316 # HTTP_* keys contain HTTP request headers. The Headers structure should
273 317 # perform case normalization for us. We just rewrite underscore to dash
274 318 # so keys match what likely went over the wire.
275 319 headers = []
276 320 for k, v in env.iteritems():
277 321 if k.startswith('HTTP_'):
278 322 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
279 323
280 324 headers = wsgiheaders.Headers(headers)
281 325
282 326 # This is kind of a lie because the HTTP header wasn't explicitly
283 327 # sent. But for all intents and purposes it should be OK to lie about
284 328 # this, since a consumer will either either value to determine how many
285 329 # bytes are available to read.
286 330 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
287 331 headers['Content-Length'] = env['CONTENT_LENGTH']
288 332
289 333 # TODO do this once we remove wsgirequest.inp, otherwise we could have
290 334 # multiple readers from the underlying input stream.
291 335 #bodyfh = env['wsgi.input']
292 336 #if 'Content-Length' in headers:
293 337 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
294 338
295 339 return parsedrequest(method=env['REQUEST_METHOD'],
296 340 url=fullurl, baseurl=baseurl,
297 341 advertisedurl=advertisedfullurl,
298 342 advertisedbaseurl=advertisedbaseurl,
299 343 urlscheme=env['wsgi.url_scheme'],
300 344 remoteuser=env.get('REMOTE_USER'),
301 345 remotehost=env.get('REMOTE_HOST'),
302 346 apppath=apppath,
303 347 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
304 348 reponame=reponame,
305 349 querystring=querystring,
306 350 qsparams=qsparams,
307 351 headers=headers,
308 352 bodyfh=bodyfh)
309 353
310 354 class offsettrackingwriter(object):
311 355 """A file object like object that is append only and tracks write count.
312 356
313 357 Instances are bound to a callable. This callable is called with data
314 358 whenever a ``write()`` is attempted.
315 359
316 360 Instances track the amount of written data so they can answer ``tell()``
317 361 requests.
318 362
319 363 The intent of this class is to wrap the ``write()`` function returned by
320 364 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
321 365 not a file object, it doesn't implement other file object methods.
322 366 """
323 367 def __init__(self, writefn):
324 368 self._write = writefn
325 369 self._offset = 0
326 370
327 371 def write(self, s):
328 372 res = self._write(s)
329 373 # Some Python objects don't report the number of bytes written.
330 374 if res is None:
331 375 self._offset += len(s)
332 376 else:
333 377 self._offset += res
334 378
335 379 def flush(self):
336 380 pass
337 381
338 382 def tell(self):
339 383 return self._offset
340 384
341 385 class wsgiresponse(object):
342 386 """Represents a response to a WSGI request.
343 387
344 388 A response consists of a status line, headers, and a body.
345 389
346 390 Consumers must populate the ``status`` and ``headers`` fields and
347 391 make a call to a ``setbody*()`` method before the response can be
348 392 issued.
349 393
350 394 When it is time to start sending the response over the wire,
351 395 ``sendresponse()`` is called. It handles emitting the header portion
352 396 of the response message. It then yields chunks of body data to be
353 397 written to the peer. Typically, the WSGI application itself calls
354 398 and returns the value from ``sendresponse()``.
355 399 """
356 400
357 401 def __init__(self, req, startresponse):
358 402 """Create an empty response tied to a specific request.
359 403
360 404 ``req`` is a ``parsedrequest``. ``startresponse`` is the
361 405 ``start_response`` function passed to the WSGI application.
362 406 """
363 407 self._req = req
364 408 self._startresponse = startresponse
365 409
366 410 self.status = None
367 411 self.headers = wsgiheaders.Headers([])
368 412
369 413 self._bodybytes = None
370 414 self._bodygen = None
371 415 self._bodywillwrite = False
372 416 self._started = False
373 417 self._bodywritefn = None
374 418
375 419 def _verifybody(self):
376 420 if (self._bodybytes is not None or self._bodygen is not None
377 421 or self._bodywillwrite):
378 422 raise error.ProgrammingError('cannot define body multiple times')
379 423
380 424 def setbodybytes(self, b):
381 425 """Define the response body as static bytes.
382 426
383 427 The empty string signals that there is no response body.
384 428 """
385 429 self._verifybody()
386 430 self._bodybytes = b
387 431 self.headers['Content-Length'] = '%d' % len(b)
388 432
389 433 def setbodygen(self, gen):
390 434 """Define the response body as a generator of bytes."""
391 435 self._verifybody()
392 436 self._bodygen = gen
393 437
394 438 def setbodywillwrite(self):
395 439 """Signal an intent to use write() to emit the response body.
396 440
397 441 **This is the least preferred way to send a body.**
398 442
399 443 It is preferred for WSGI applications to emit a generator of chunks
400 444 constituting the response body. However, some consumers can't emit
401 445 data this way. So, WSGI provides a way to obtain a ``write(data)``
402 446 function that can be used to synchronously perform an unbuffered
403 447 write.
404 448
405 449 Calling this function signals an intent to produce the body in this
406 450 manner.
407 451 """
408 452 self._verifybody()
409 453 self._bodywillwrite = True
410 454
411 455 def sendresponse(self):
412 456 """Send the generated response to the client.
413 457
414 458 Before this is called, ``status`` must be set and one of
415 459 ``setbodybytes()`` or ``setbodygen()`` must be called.
416 460
417 461 Calling this method multiple times is not allowed.
418 462 """
419 463 if self._started:
420 464 raise error.ProgrammingError('sendresponse() called multiple times')
421 465
422 466 self._started = True
423 467
424 468 if not self.status:
425 469 raise error.ProgrammingError('status line not defined')
426 470
427 471 if (self._bodybytes is None and self._bodygen is None
428 472 and not self._bodywillwrite):
429 473 raise error.ProgrammingError('response body not defined')
430 474
431 475 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
432 476 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
433 477 # and SHOULD NOT generate other headers unless they could be used
434 478 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
435 479 # states that no response body can be issued. Content-Length can
436 480 # be sent. But if it is present, it should be the size of the response
437 481 # that wasn't transferred.
438 482 if self.status.startswith('304 '):
439 483 # setbodybytes('') will set C-L to 0. This doesn't conform with the
440 484 # spec. So remove it.
441 485 if self.headers.get('Content-Length') == '0':
442 486 del self.headers['Content-Length']
443 487
444 488 # Strictly speaking, this is too strict. But until it causes
445 489 # problems, let's be strict.
446 490 badheaders = {k for k in self.headers.keys()
447 491 if k.lower() not in ('date', 'etag', 'expires',
448 492 'cache-control',
449 493 'content-location',
450 494 'vary')}
451 495 if badheaders:
452 496 raise error.ProgrammingError(
453 497 'illegal header on 304 response: %s' %
454 498 ', '.join(sorted(badheaders)))
455 499
456 500 if self._bodygen is not None or self._bodywillwrite:
457 501 raise error.ProgrammingError("must use setbodybytes('') with "
458 502 "304 responses")
459 503
460 504 # Various HTTP clients (notably httplib) won't read the HTTP response
461 505 # until the HTTP request has been sent in full. If servers (us) send a
462 506 # response before the HTTP request has been fully sent, the connection
463 507 # may deadlock because neither end is reading.
464 508 #
465 509 # We work around this by "draining" the request data before
466 510 # sending any response in some conditions.
467 511 drain = False
468 512 close = False
469 513
470 514 # If the client sent Expect: 100-continue, we assume it is smart enough
471 515 # to deal with the server sending a response before reading the request.
472 516 # (httplib doesn't do this.)
473 517 if self._req.headers.get('Expect', '').lower() == '100-continue':
474 518 pass
475 519 # Only tend to request methods that have bodies. Strictly speaking,
476 520 # we should sniff for a body. But this is fine for our existing
477 521 # WSGI applications.
478 522 elif self._req.method not in ('POST', 'PUT'):
479 523 pass
480 524 else:
481 525 # If we don't know how much data to read, there's no guarantee
482 526 # that we can drain the request responsibly. The WSGI
483 527 # specification only says that servers *should* ensure the
484 528 # input stream doesn't overrun the actual request. So there's
485 529 # no guarantee that reading until EOF won't corrupt the stream
486 530 # state.
487 531 if not isinstance(self._req.bodyfh, util.cappedreader):
488 532 close = True
489 533 else:
490 534 # We /could/ only drain certain HTTP response codes. But 200 and
491 535 # non-200 wire protocol responses both require draining. Since
492 536 # we have a capped reader in place for all situations where we
493 537 # drain, it is safe to read from that stream. We'll either do
494 538 # a drain or no-op if we're already at EOF.
495 539 drain = True
496 540
497 541 if close:
498 542 self.headers['Connection'] = 'Close'
499 543
500 544 if drain:
501 545 assert isinstance(self._req.bodyfh, util.cappedreader)
502 546 while True:
503 547 chunk = self._req.bodyfh.read(32768)
504 548 if not chunk:
505 549 break
506 550
507 551 write = self._startresponse(pycompat.sysstr(self.status),
508 552 self.headers.items())
509 553
510 554 if self._bodybytes:
511 555 yield self._bodybytes
512 556 elif self._bodygen:
513 557 for chunk in self._bodygen:
514 558 yield chunk
515 559 elif self._bodywillwrite:
516 560 self._bodywritefn = write
517 561 else:
518 562 error.ProgrammingError('do not know how to send body')
519 563
520 564 def getbodyfile(self):
521 565 """Obtain a file object like object representing the response body.
522 566
523 567 For this to work, you must call ``setbodywillwrite()`` and then
524 568 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
525 569 function won't run to completion unless the generator is advanced. The
526 570 generator yields not items. The easiest way to consume it is with
527 571 ``list(res.sendresponse())``, which should resolve to an empty list -
528 572 ``[]``.
529 573 """
530 574 if not self._bodywillwrite:
531 575 raise error.ProgrammingError('must call setbodywillwrite() first')
532 576
533 577 if not self._started:
534 578 raise error.ProgrammingError('must call sendresponse() first; did '
535 579 'you remember to consume it since it '
536 580 'is a generator?')
537 581
538 582 assert self._bodywritefn
539 583 return offsettrackingwriter(self._bodywritefn)
540 584
541 585 class wsgirequest(object):
542 586 """Higher-level API for a WSGI request.
543 587
544 588 WSGI applications are invoked with 2 arguments. They are used to
545 589 instantiate instances of this class, which provides higher-level APIs
546 590 for obtaining request parameters, writing HTTP output, etc.
547 591 """
548 def __init__(self, wsgienv, start_response):
592 def __init__(self, wsgienv, start_response, altbaseurl=None):
549 593 version = wsgienv[r'wsgi.version']
550 594 if (version < (1, 0)) or (version >= (2, 0)):
551 595 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
552 596 % version)
553 597
554 598 inp = wsgienv[r'wsgi.input']
555 599
556 600 if r'HTTP_CONTENT_LENGTH' in wsgienv:
557 601 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
558 602 elif r'CONTENT_LENGTH' in wsgienv:
559 603 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
560 604
561 605 self.err = wsgienv[r'wsgi.errors']
562 606 self.threaded = wsgienv[r'wsgi.multithread']
563 607 self.multiprocess = wsgienv[r'wsgi.multiprocess']
564 608 self.run_once = wsgienv[r'wsgi.run_once']
565 609 self.env = wsgienv
566 self.req = parserequestfromenv(wsgienv, inp)
610 self.req = parserequestfromenv(wsgienv, inp, altbaseurl=altbaseurl)
567 611 self.res = wsgiresponse(self.req, start_response)
568 612 self._start_response = start_response
569 613 self.server_write = None
570 614 self.headers = []
571 615
572 616 def respond(self, status, type, filename=None, body=None):
573 617 if not isinstance(type, str):
574 618 type = pycompat.sysstr(type)
575 619 if self._start_response is not None:
576 620 self.headers.append((r'Content-Type', type))
577 621 if filename:
578 622 filename = (filename.rpartition('/')[-1]
579 623 .replace('\\', '\\\\').replace('"', '\\"'))
580 624 self.headers.append(('Content-Disposition',
581 625 'inline; filename="%s"' % filename))
582 626 if body is not None:
583 627 self.headers.append((r'Content-Length', str(len(body))))
584 628
585 629 for k, v in self.headers:
586 630 if not isinstance(v, str):
587 631 raise TypeError('header value must be string: %r' % (v,))
588 632
589 633 if isinstance(status, ErrorResponse):
590 634 self.headers.extend(status.headers)
591 635 status = statusmessage(status.code, pycompat.bytestr(status))
592 636 elif status == 200:
593 637 status = '200 Script output follows'
594 638 elif isinstance(status, int):
595 639 status = statusmessage(status)
596 640
597 641 # Various HTTP clients (notably httplib) won't read the HTTP
598 642 # response until the HTTP request has been sent in full. If servers
599 643 # (us) send a response before the HTTP request has been fully sent,
600 644 # the connection may deadlock because neither end is reading.
601 645 #
602 646 # We work around this by "draining" the request data before
603 647 # sending any response in some conditions.
604 648 drain = False
605 649 close = False
606 650
607 651 # If the client sent Expect: 100-continue, we assume it is smart
608 652 # enough to deal with the server sending a response before reading
609 653 # the request. (httplib doesn't do this.)
610 654 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
611 655 pass
612 656 # Only tend to request methods that have bodies. Strictly speaking,
613 657 # we should sniff for a body. But this is fine for our existing
614 658 # WSGI applications.
615 659 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
616 660 pass
617 661 else:
618 662 # If we don't know how much data to read, there's no guarantee
619 663 # that we can drain the request responsibly. The WSGI
620 664 # specification only says that servers *should* ensure the
621 665 # input stream doesn't overrun the actual request. So there's
622 666 # no guarantee that reading until EOF won't corrupt the stream
623 667 # state.
624 668 if not isinstance(self.req.bodyfh, util.cappedreader):
625 669 close = True
626 670 else:
627 671 # We /could/ only drain certain HTTP response codes. But 200
628 672 # and non-200 wire protocol responses both require draining.
629 673 # Since we have a capped reader in place for all situations
630 674 # where we drain, it is safe to read from that stream. We'll
631 675 # either do a drain or no-op if we're already at EOF.
632 676 drain = True
633 677
634 678 if close:
635 679 self.headers.append((r'Connection', r'Close'))
636 680
637 681 if drain:
638 682 assert isinstance(self.req.bodyfh, util.cappedreader)
639 683 while True:
640 684 chunk = self.req.bodyfh.read(32768)
641 685 if not chunk:
642 686 break
643 687
644 688 self.server_write = self._start_response(
645 689 pycompat.sysstr(status), self.headers)
646 690 self._start_response = None
647 691 self.headers = []
648 692 if body is not None:
649 693 self.write(body)
650 694 self.server_write = None
651 695
652 696 def write(self, thing):
653 697 if thing:
654 698 try:
655 699 self.server_write(thing)
656 700 except socket.error as inst:
657 701 if inst[0] != errno.ECONNRESET:
658 702 raise
659 703
660 704 def flush(self):
661 705 return None
662 706
663 707 def wsgiapplication(app_maker):
664 708 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
665 709 can and should now be used as a WSGI application.'''
666 710 application = app_maker()
667 711 def run_wsgi(env, respond):
668 712 return application(env, respond)
669 713 return run_wsgi
@@ -1,247 +1,416
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 from mercurial.hgweb import (
6 6 request as requestmod,
7 7 )
8 8 from mercurial import (
9 9 error,
10 10 )
11 11
12 12 DEFAULT_ENV = {
13 13 r'REQUEST_METHOD': r'GET',
14 14 r'SERVER_NAME': r'testserver',
15 15 r'SERVER_PORT': r'80',
16 16 r'SERVER_PROTOCOL': r'http',
17 17 r'wsgi.version': (1, 0),
18 18 r'wsgi.url_scheme': r'http',
19 19 r'wsgi.input': None,
20 20 r'wsgi.errors': None,
21 21 r'wsgi.multithread': False,
22 22 r'wsgi.multiprocess': True,
23 23 r'wsgi.run_once': False,
24 24 }
25 25
26 def parse(env, bodyfh=None, reponame=None, extra=None):
26 def parse(env, bodyfh=None, reponame=None, altbaseurl=None, extra=None):
27 27 env = dict(env)
28 28 env.update(extra or {})
29 29
30 return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame)
30 return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame,
31 altbaseurl=altbaseurl)
31 32
32 33 class ParseRequestTests(unittest.TestCase):
33 34 def testdefault(self):
34 35 r = parse(DEFAULT_ENV)
35 36 self.assertEqual(r.url, b'http://testserver')
36 37 self.assertEqual(r.baseurl, b'http://testserver')
37 38 self.assertEqual(r.advertisedurl, b'http://testserver')
38 39 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
39 40 self.assertEqual(r.urlscheme, b'http')
40 41 self.assertEqual(r.method, b'GET')
41 42 self.assertIsNone(r.remoteuser)
42 43 self.assertIsNone(r.remotehost)
43 44 self.assertEqual(r.apppath, b'')
44 45 self.assertEqual(r.dispatchparts, [])
45 46 self.assertIsNone(r.dispatchpath)
46 47 self.assertIsNone(r.reponame)
47 48 self.assertEqual(r.querystring, b'')
48 49 self.assertEqual(len(r.qsparams), 0)
49 50 self.assertEqual(len(r.headers), 0)
50 51
51 52 def testcustomport(self):
52 53 r = parse(DEFAULT_ENV, extra={
53 54 r'SERVER_PORT': r'8000',
54 55 })
55 56
56 57 self.assertEqual(r.url, b'http://testserver:8000')
57 58 self.assertEqual(r.baseurl, b'http://testserver:8000')
58 59 self.assertEqual(r.advertisedurl, b'http://testserver:8000')
59 60 self.assertEqual(r.advertisedbaseurl, b'http://testserver:8000')
60 61
61 62 r = parse(DEFAULT_ENV, extra={
62 63 r'SERVER_PORT': r'4000',
63 64 r'wsgi.url_scheme': r'https',
64 65 })
65 66
66 67 self.assertEqual(r.url, b'https://testserver:4000')
67 68 self.assertEqual(r.baseurl, b'https://testserver:4000')
68 69 self.assertEqual(r.advertisedurl, b'https://testserver:4000')
69 70 self.assertEqual(r.advertisedbaseurl, b'https://testserver:4000')
70 71
71 72 def testhttphost(self):
72 73 r = parse(DEFAULT_ENV, extra={
73 74 r'HTTP_HOST': r'altserver',
74 75 })
75 76
76 77 self.assertEqual(r.url, b'http://altserver')
77 78 self.assertEqual(r.baseurl, b'http://altserver')
78 79 self.assertEqual(r.advertisedurl, b'http://testserver')
79 80 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
80 81
81 82 def testscriptname(self):
82 83 r = parse(DEFAULT_ENV, extra={
83 84 r'SCRIPT_NAME': r'',
84 85 })
85 86
86 87 self.assertEqual(r.url, b'http://testserver')
87 88 self.assertEqual(r.baseurl, b'http://testserver')
88 89 self.assertEqual(r.advertisedurl, b'http://testserver')
89 90 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
90 91 self.assertEqual(r.apppath, b'')
91 92 self.assertEqual(r.dispatchparts, [])
92 93 self.assertIsNone(r.dispatchpath)
93 94
94 95 r = parse(DEFAULT_ENV, extra={
95 96 r'SCRIPT_NAME': r'/script',
96 97 })
97 98
98 99 self.assertEqual(r.url, b'http://testserver/script')
99 100 self.assertEqual(r.baseurl, b'http://testserver')
100 101 self.assertEqual(r.advertisedurl, b'http://testserver/script')
101 102 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
102 103 self.assertEqual(r.apppath, b'/script')
103 104 self.assertEqual(r.dispatchparts, [])
104 105 self.assertIsNone(r.dispatchpath)
105 106
106 107 r = parse(DEFAULT_ENV, extra={
107 108 r'SCRIPT_NAME': r'/multiple words',
108 109 })
109 110
110 111 self.assertEqual(r.url, b'http://testserver/multiple%20words')
111 112 self.assertEqual(r.baseurl, b'http://testserver')
112 113 self.assertEqual(r.advertisedurl, b'http://testserver/multiple%20words')
113 114 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
114 115 self.assertEqual(r.apppath, b'/multiple words')
115 116 self.assertEqual(r.dispatchparts, [])
116 117 self.assertIsNone(r.dispatchpath)
117 118
118 119 def testpathinfo(self):
119 120 r = parse(DEFAULT_ENV, extra={
120 121 r'PATH_INFO': r'',
121 122 })
122 123
123 124 self.assertEqual(r.url, b'http://testserver')
124 125 self.assertEqual(r.baseurl, b'http://testserver')
125 126 self.assertEqual(r.advertisedurl, b'http://testserver')
126 127 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
127 128 self.assertEqual(r.apppath, b'')
128 129 self.assertEqual(r.dispatchparts, [])
129 130 self.assertEqual(r.dispatchpath, b'')
130 131
131 132 r = parse(DEFAULT_ENV, extra={
132 133 r'PATH_INFO': r'/pathinfo',
133 134 })
134 135
135 136 self.assertEqual(r.url, b'http://testserver/pathinfo')
136 137 self.assertEqual(r.baseurl, b'http://testserver')
137 138 self.assertEqual(r.advertisedurl, b'http://testserver/pathinfo')
138 139 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
139 140 self.assertEqual(r.apppath, b'')
140 141 self.assertEqual(r.dispatchparts, [b'pathinfo'])
141 142 self.assertEqual(r.dispatchpath, b'pathinfo')
142 143
143 144 r = parse(DEFAULT_ENV, extra={
144 145 r'PATH_INFO': r'/one/two/',
145 146 })
146 147
147 148 self.assertEqual(r.url, b'http://testserver/one/two/')
148 149 self.assertEqual(r.baseurl, b'http://testserver')
149 150 self.assertEqual(r.advertisedurl, b'http://testserver/one/two/')
150 151 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
151 152 self.assertEqual(r.apppath, b'')
152 153 self.assertEqual(r.dispatchparts, [b'one', b'two'])
153 154 self.assertEqual(r.dispatchpath, b'one/two')
154 155
155 156 def testscriptandpathinfo(self):
156 157 r = parse(DEFAULT_ENV, extra={
157 158 r'SCRIPT_NAME': r'/script',
158 159 r'PATH_INFO': r'/pathinfo',
159 160 })
160 161
161 162 self.assertEqual(r.url, b'http://testserver/script/pathinfo')
162 163 self.assertEqual(r.baseurl, b'http://testserver')
163 164 self.assertEqual(r.advertisedurl, b'http://testserver/script/pathinfo')
164 165 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
165 166 self.assertEqual(r.apppath, b'/script')
166 167 self.assertEqual(r.dispatchparts, [b'pathinfo'])
167 168 self.assertEqual(r.dispatchpath, b'pathinfo')
168 169
169 170 r = parse(DEFAULT_ENV, extra={
170 171 r'SCRIPT_NAME': r'/script1/script2',
171 172 r'PATH_INFO': r'/path1/path2',
172 173 })
173 174
174 175 self.assertEqual(r.url,
175 176 b'http://testserver/script1/script2/path1/path2')
176 177 self.assertEqual(r.baseurl, b'http://testserver')
177 178 self.assertEqual(r.advertisedurl,
178 179 b'http://testserver/script1/script2/path1/path2')
179 180 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
180 181 self.assertEqual(r.apppath, b'/script1/script2')
181 182 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
182 183 self.assertEqual(r.dispatchpath, b'path1/path2')
183 184
184 185 r = parse(DEFAULT_ENV, extra={
185 186 r'HTTP_HOST': r'hostserver',
186 187 r'SCRIPT_NAME': r'/script',
187 188 r'PATH_INFO': r'/pathinfo',
188 189 })
189 190
190 191 self.assertEqual(r.url, b'http://hostserver/script/pathinfo')
191 192 self.assertEqual(r.baseurl, b'http://hostserver')
192 193 self.assertEqual(r.advertisedurl, b'http://testserver/script/pathinfo')
193 194 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
194 195 self.assertEqual(r.apppath, b'/script')
195 196 self.assertEqual(r.dispatchparts, [b'pathinfo'])
196 197 self.assertEqual(r.dispatchpath, b'pathinfo')
197 198
198 199 def testreponame(self):
199 200 """repository path components get stripped from URL."""
200 201
201 202 with self.assertRaisesRegexp(error.ProgrammingError,
202 203 b'reponame requires PATH_INFO'):
203 204 parse(DEFAULT_ENV, reponame=b'repo')
204 205
205 206 with self.assertRaisesRegexp(error.ProgrammingError,
206 207 b'PATH_INFO does not begin with repo '
207 208 b'name'):
208 209 parse(DEFAULT_ENV, reponame=b'repo', extra={
209 210 r'PATH_INFO': r'/pathinfo',
210 211 })
211 212
212 213 with self.assertRaisesRegexp(error.ProgrammingError,
213 214 b'reponame prefix of PATH_INFO'):
214 215 parse(DEFAULT_ENV, reponame=b'repo', extra={
215 216 r'PATH_INFO': r'/repoextra/path',
216 217 })
217 218
218 219 r = parse(DEFAULT_ENV, reponame=b'repo', extra={
219 220 r'PATH_INFO': r'/repo/path1/path2',
220 221 })
221 222
222 223 self.assertEqual(r.url, b'http://testserver/repo/path1/path2')
223 224 self.assertEqual(r.baseurl, b'http://testserver')
224 225 self.assertEqual(r.advertisedurl, b'http://testserver/repo/path1/path2')
225 226 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
226 227 self.assertEqual(r.apppath, b'/repo')
227 228 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
228 229 self.assertEqual(r.dispatchpath, b'path1/path2')
229 230 self.assertEqual(r.reponame, b'repo')
230 231
231 232 r = parse(DEFAULT_ENV, reponame=b'prefix/repo', extra={
232 233 r'PATH_INFO': r'/prefix/repo/path1/path2',
233 234 })
234 235
235 236 self.assertEqual(r.url, b'http://testserver/prefix/repo/path1/path2')
236 237 self.assertEqual(r.baseurl, b'http://testserver')
237 238 self.assertEqual(r.advertisedurl,
238 239 b'http://testserver/prefix/repo/path1/path2')
239 240 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
240 241 self.assertEqual(r.apppath, b'/prefix/repo')
241 242 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
242 243 self.assertEqual(r.dispatchpath, b'path1/path2')
243 244 self.assertEqual(r.reponame, b'prefix/repo')
244 245
246 def testaltbaseurl(self):
247 # Simple hostname remap.
248 r = parse(DEFAULT_ENV, altbaseurl='http://altserver')
249
250 self.assertEqual(r.url, b'http://testserver')
251 self.assertEqual(r.baseurl, b'http://testserver')
252 self.assertEqual(r.advertisedurl, b'http://altserver')
253 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
254 self.assertEqual(r.urlscheme, b'http')
255 self.assertEqual(r.apppath, b'')
256 self.assertEqual(r.dispatchparts, [])
257 self.assertIsNone(r.dispatchpath)
258 self.assertIsNone(r.reponame)
259
260 # With a custom port.
261 r = parse(DEFAULT_ENV, altbaseurl='http://altserver:8000')
262 self.assertEqual(r.url, b'http://testserver')
263 self.assertEqual(r.baseurl, b'http://testserver')
264 self.assertEqual(r.advertisedurl, b'http://altserver:8000')
265 self.assertEqual(r.advertisedbaseurl, b'http://altserver:8000')
266 self.assertEqual(r.urlscheme, b'http')
267 self.assertEqual(r.apppath, b'')
268 self.assertEqual(r.dispatchparts, [])
269 self.assertIsNone(r.dispatchpath)
270 self.assertIsNone(r.reponame)
271
272 # With a changed protocol.
273 r = parse(DEFAULT_ENV, altbaseurl='https://altserver')
274 self.assertEqual(r.url, b'http://testserver')
275 self.assertEqual(r.baseurl, b'http://testserver')
276 self.assertEqual(r.advertisedurl, b'https://altserver')
277 self.assertEqual(r.advertisedbaseurl, b'https://altserver')
278 # URL scheme is defined as the actual scheme, not advertised.
279 self.assertEqual(r.urlscheme, b'http')
280 self.assertEqual(r.apppath, b'')
281 self.assertEqual(r.dispatchparts, [])
282 self.assertIsNone(r.dispatchpath)
283 self.assertIsNone(r.reponame)
284
285 # Need to specify explicit port number for proper https:// alt URLs.
286 r = parse(DEFAULT_ENV, altbaseurl='https://altserver:443')
287 self.assertEqual(r.url, b'http://testserver')
288 self.assertEqual(r.baseurl, b'http://testserver')
289 self.assertEqual(r.advertisedurl, b'https://altserver')
290 self.assertEqual(r.advertisedbaseurl, b'https://altserver')
291 self.assertEqual(r.urlscheme, b'http')
292 self.assertEqual(r.apppath, b'')
293 self.assertEqual(r.dispatchparts, [])
294 self.assertIsNone(r.dispatchpath)
295 self.assertIsNone(r.reponame)
296
297 # With only PATH_INFO defined.
298 r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
299 r'PATH_INFO': r'/path1/path2',
300 })
301 self.assertEqual(r.url, b'http://testserver/path1/path2')
302 self.assertEqual(r.baseurl, b'http://testserver')
303 self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
304 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
305 self.assertEqual(r.urlscheme, b'http')
306 self.assertEqual(r.apppath, b'')
307 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
308 self.assertEqual(r.dispatchpath, b'path1/path2')
309 self.assertIsNone(r.reponame)
310
311 # Path on alt URL.
312 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath')
313 self.assertEqual(r.url, b'http://testserver')
314 self.assertEqual(r.baseurl, b'http://testserver')
315 self.assertEqual(r.advertisedurl, b'http://altserver/altpath')
316 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
317 self.assertEqual(r.urlscheme, b'http')
318 self.assertEqual(r.apppath, b'/altpath')
319 self.assertEqual(r.dispatchparts, [])
320 self.assertIsNone(r.dispatchpath)
321 self.assertIsNone(r.reponame)
322
323 # With a trailing slash.
324 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/')
325 self.assertEqual(r.url, b'http://testserver')
326 self.assertEqual(r.baseurl, b'http://testserver')
327 self.assertEqual(r.advertisedurl, b'http://altserver/altpath/')
328 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
329 self.assertEqual(r.urlscheme, b'http')
330 self.assertEqual(r.apppath, b'/altpath/')
331 self.assertEqual(r.dispatchparts, [])
332 self.assertIsNone(r.dispatchpath)
333 self.assertIsNone(r.reponame)
334
335 # PATH_INFO + path on alt URL.
336 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath', extra={
337 r'PATH_INFO': r'/path1/path2',
338 })
339 self.assertEqual(r.url, b'http://testserver/path1/path2')
340 self.assertEqual(r.baseurl, b'http://testserver')
341 self.assertEqual(r.advertisedurl,
342 b'http://altserver/altpath/path1/path2')
343 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
344 self.assertEqual(r.urlscheme, b'http')
345 self.assertEqual(r.apppath, b'/altpath')
346 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
347 self.assertEqual(r.dispatchpath, b'path1/path2')
348 self.assertIsNone(r.reponame)
349
350 # PATH_INFO + path on alt URL with trailing slash.
351 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altpath/', extra={
352 r'PATH_INFO': r'/path1/path2',
353 })
354 self.assertEqual(r.url, b'http://testserver/path1/path2')
355 self.assertEqual(r.baseurl, b'http://testserver')
356 self.assertEqual(r.advertisedurl,
357 b'http://altserver/altpath//path1/path2')
358 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
359 self.assertEqual(r.urlscheme, b'http')
360 self.assertEqual(r.apppath, b'/altpath/')
361 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
362 self.assertEqual(r.dispatchpath, b'path1/path2')
363 self.assertIsNone(r.reponame)
364
365 # Local SCRIPT_NAME is ignored.
366 r = parse(DEFAULT_ENV, altbaseurl='http://altserver', extra={
367 r'SCRIPT_NAME': r'/script',
368 r'PATH_INFO': r'/path1/path2',
369 })
370 self.assertEqual(r.url, b'http://testserver/script/path1/path2')
371 self.assertEqual(r.baseurl, b'http://testserver')
372 self.assertEqual(r.advertisedurl, b'http://altserver/path1/path2')
373 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
374 self.assertEqual(r.urlscheme, b'http')
375 self.assertEqual(r.apppath, b'')
376 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
377 self.assertEqual(r.dispatchpath, b'path1/path2')
378 self.assertIsNone(r.reponame)
379
380 # Use remote's path for script name, app path
381 r = parse(DEFAULT_ENV, altbaseurl='http://altserver/altroot', extra={
382 r'SCRIPT_NAME': r'/script',
383 r'PATH_INFO': r'/path1/path2',
384 })
385 self.assertEqual(r.url, b'http://testserver/script/path1/path2')
386 self.assertEqual(r.baseurl, b'http://testserver')
387 self.assertEqual(r.advertisedurl,
388 b'http://altserver/altroot/path1/path2')
389 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
390 self.assertEqual(r.urlscheme, b'http')
391 self.assertEqual(r.apppath, b'/altroot')
392 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
393 self.assertEqual(r.dispatchpath, b'path1/path2')
394 self.assertIsNone(r.reponame)
395
396 # reponame is factored in properly.
397 r = parse(DEFAULT_ENV, reponame=b'repo',
398 altbaseurl='http://altserver/altroot',
399 extra={
400 r'SCRIPT_NAME': r'/script',
401 r'PATH_INFO': r'/repo/path1/path2',
402 })
403
404 self.assertEqual(r.url, b'http://testserver/script/repo/path1/path2')
405 self.assertEqual(r.baseurl, b'http://testserver')
406 self.assertEqual(r.advertisedurl,
407 b'http://altserver/altroot/repo/path1/path2')
408 self.assertEqual(r.advertisedbaseurl, b'http://altserver')
409 self.assertEqual(r.apppath, b'/altroot/repo')
410 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
411 self.assertEqual(r.dispatchpath, b'path1/path2')
412 self.assertEqual(r.reponame, b'repo')
413
245 414 if __name__ == '__main__':
246 415 import silenttestrunner
247 416 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now