##// END OF EJS Templates
hgweb: refactor repository name URL parsing...
Gregory Szorc -
r36913:d7fd203e default
parent child Browse files
Show More
@@ -1,569 +1,566 b''
1 1 # hgweb/hgwebdir_mod.py - Web interface for a directory of repositories.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import os
12 12 import re
13 13 import time
14 14
15 15 from ..i18n import _
16 16
17 17 from .common import (
18 18 ErrorResponse,
19 19 HTTP_NOT_FOUND,
20 20 HTTP_OK,
21 21 HTTP_SERVER_ERROR,
22 22 cspvalues,
23 23 get_contact,
24 24 get_mtime,
25 25 ismember,
26 26 paritygen,
27 27 staticfile,
28 28 )
29 29
30 30 from .. import (
31 31 configitems,
32 32 encoding,
33 33 error,
34 34 hg,
35 35 profiling,
36 36 pycompat,
37 37 scmutil,
38 38 templater,
39 39 ui as uimod,
40 40 util,
41 41 )
42 42
43 43 from . import (
44 44 hgweb_mod,
45 45 request as requestmod,
46 46 webutil,
47 47 wsgicgi,
48 48 )
49 49 from ..utils import dateutil
50 50
51 51 def cleannames(items):
52 52 return [(util.pconvert(name).strip('/'), path) for name, path in items]
53 53
54 54 def findrepos(paths):
55 55 repos = []
56 56 for prefix, root in cleannames(paths):
57 57 roothead, roottail = os.path.split(root)
58 58 # "foo = /bar/*" or "foo = /bar/**" lets every repo /bar/N in or below
59 59 # /bar/ be served as as foo/N .
60 60 # '*' will not search inside dirs with .hg (except .hg/patches),
61 61 # '**' will search inside dirs with .hg (and thus also find subrepos).
62 62 try:
63 63 recurse = {'*': False, '**': True}[roottail]
64 64 except KeyError:
65 65 repos.append((prefix, root))
66 66 continue
67 67 roothead = os.path.normpath(os.path.abspath(roothead))
68 68 paths = scmutil.walkrepos(roothead, followsym=True, recurse=recurse)
69 69 repos.extend(urlrepos(prefix, roothead, paths))
70 70 return repos
71 71
72 72 def urlrepos(prefix, roothead, paths):
73 73 """yield url paths and filesystem paths from a list of repo paths
74 74
75 75 >>> conv = lambda seq: [(v, util.pconvert(p)) for v,p in seq]
76 76 >>> conv(urlrepos(b'hg', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
77 77 [('hg/r', '/opt/r'), ('hg/r/r', '/opt/r/r'), ('hg', '/opt')]
78 78 >>> conv(urlrepos(b'', b'/opt', [b'/opt/r', b'/opt/r/r', b'/opt']))
79 79 [('r', '/opt/r'), ('r/r', '/opt/r/r'), ('', '/opt')]
80 80 """
81 81 for path in paths:
82 82 path = os.path.normpath(path)
83 83 yield (prefix + '/' +
84 84 util.pconvert(path[len(roothead):]).lstrip('/')).strip('/'), path
85 85
86 86 def geturlcgivars(baseurl, port):
87 87 """
88 88 Extract CGI variables from baseurl
89 89
90 90 >>> geturlcgivars(b"http://host.org/base", b"80")
91 91 ('host.org', '80', '/base')
92 92 >>> geturlcgivars(b"http://host.org:8000/base", b"80")
93 93 ('host.org', '8000', '/base')
94 94 >>> geturlcgivars(b'/base', 8000)
95 95 ('', '8000', '/base')
96 96 >>> geturlcgivars(b"base", b'8000')
97 97 ('', '8000', '/base')
98 98 >>> geturlcgivars(b"http://host", b'8000')
99 99 ('host', '8000', '/')
100 100 >>> geturlcgivars(b"http://host/", b'8000')
101 101 ('host', '8000', '/')
102 102 """
103 103 u = util.url(baseurl)
104 104 name = u.host or ''
105 105 if u.port:
106 106 port = u.port
107 107 path = u.path or ""
108 108 if not path.startswith('/'):
109 109 path = '/' + path
110 110
111 111 return name, pycompat.bytestr(port), path
112 112
113 113 def readallowed(ui, req):
114 114 """Check allow_read and deny_read config options of a repo's ui object
115 115 to determine user permissions. By default, with neither option set (or
116 116 both empty), allow all users to read the repo. There are two ways a
117 117 user can be denied read access: (1) deny_read is not empty, and the
118 118 user is unauthenticated or deny_read contains user (or *), and (2)
119 119 allow_read is not empty and the user is not in allow_read. Return True
120 120 if user is allowed to read the repo, else return False."""
121 121
122 122 user = req.remoteuser
123 123
124 124 deny_read = ui.configlist('web', 'deny_read', untrusted=True)
125 125 if deny_read and (not user or ismember(ui, user, deny_read)):
126 126 return False
127 127
128 128 allow_read = ui.configlist('web', 'allow_read', untrusted=True)
129 129 # by default, allow reading if no allow_read option has been set
130 130 if not allow_read or ismember(ui, user, allow_read):
131 131 return True
132 132
133 133 return False
134 134
135 135 def archivelist(ui, nodeid, url):
136 136 allowed = ui.configlist('web', 'allow_archive', untrusted=True)
137 137 archives = []
138 138
139 139 for typ, spec in hgweb_mod.archivespecs.iteritems():
140 140 if typ in allowed or ui.configbool('web', 'allow' + typ,
141 141 untrusted=True):
142 142 archives.append({
143 143 'type': typ,
144 144 'extension': spec[2],
145 145 'node': nodeid,
146 146 'url': url,
147 147 })
148 148
149 149 return archives
150 150
151 151 def rawindexentries(ui, repos, wsgireq, req, subdir=''):
152 152 descend = ui.configbool('web', 'descend')
153 153 collapse = ui.configbool('web', 'collapse')
154 154 seenrepos = set()
155 155 seendirs = set()
156 156 for name, path in repos:
157 157
158 158 if not name.startswith(subdir):
159 159 continue
160 160 name = name[len(subdir):]
161 161 directory = False
162 162
163 163 if '/' in name:
164 164 if not descend:
165 165 continue
166 166
167 167 nameparts = name.split('/')
168 168 rootname = nameparts[0]
169 169
170 170 if not collapse:
171 171 pass
172 172 elif rootname in seendirs:
173 173 continue
174 174 elif rootname in seenrepos:
175 175 pass
176 176 else:
177 177 directory = True
178 178 name = rootname
179 179
180 180 # redefine the path to refer to the directory
181 181 discarded = '/'.join(nameparts[1:])
182 182
183 183 # remove name parts plus accompanying slash
184 184 path = path[:-len(discarded) - 1]
185 185
186 186 try:
187 187 r = hg.repository(ui, path)
188 188 directory = False
189 189 except (IOError, error.RepoError):
190 190 pass
191 191
192 192 parts = [name]
193 193 parts.insert(0, '/' + subdir.rstrip('/'))
194 194 if wsgireq.env['SCRIPT_NAME']:
195 195 parts.insert(0, wsgireq.env['SCRIPT_NAME'])
196 196 url = re.sub(r'/+', '/', '/'.join(parts) + '/')
197 197
198 198 # show either a directory entry or a repository
199 199 if directory:
200 200 # get the directory's time information
201 201 try:
202 202 d = (get_mtime(path), dateutil.makedate()[1])
203 203 except OSError:
204 204 continue
205 205
206 206 # add '/' to the name to make it obvious that
207 207 # the entry is a directory, not a regular repository
208 208 row = {'contact': "",
209 209 'contact_sort': "",
210 210 'name': name + '/',
211 211 'name_sort': name,
212 212 'url': url,
213 213 'description': "",
214 214 'description_sort': "",
215 215 'lastchange': d,
216 216 'lastchange_sort': d[1] - d[0],
217 217 'archives': [],
218 218 'isdirectory': True,
219 219 'labels': [],
220 220 }
221 221
222 222 seendirs.add(name)
223 223 yield row
224 224 continue
225 225
226 226 u = ui.copy()
227 227 try:
228 228 u.readconfig(os.path.join(path, '.hg', 'hgrc'))
229 229 except Exception as e:
230 230 u.warn(_('error reading %s/.hg/hgrc: %s\n') % (path, e))
231 231 continue
232 232
233 233 def get(section, name, default=uimod._unset):
234 234 return u.config(section, name, default, untrusted=True)
235 235
236 236 if u.configbool("web", "hidden", untrusted=True):
237 237 continue
238 238
239 239 if not readallowed(u, req):
240 240 continue
241 241
242 242 # update time with local timezone
243 243 try:
244 244 r = hg.repository(ui, path)
245 245 except IOError:
246 246 u.warn(_('error accessing repository at %s\n') % path)
247 247 continue
248 248 except error.RepoError:
249 249 u.warn(_('error accessing repository at %s\n') % path)
250 250 continue
251 251 try:
252 252 d = (get_mtime(r.spath), dateutil.makedate()[1])
253 253 except OSError:
254 254 continue
255 255
256 256 contact = get_contact(get)
257 257 description = get("web", "description")
258 258 seenrepos.add(name)
259 259 name = get("web", "name", name)
260 260 row = {'contact': contact or "unknown",
261 261 'contact_sort': contact.upper() or "unknown",
262 262 'name': name,
263 263 'name_sort': name,
264 264 'url': url,
265 265 'description': description or "unknown",
266 266 'description_sort': description.upper() or "unknown",
267 267 'lastchange': d,
268 268 'lastchange_sort': d[1] - d[0],
269 269 'archives': archivelist(u, "tip", url),
270 270 'isdirectory': None,
271 271 'labels': u.configlist('web', 'labels', untrusted=True),
272 272 }
273 273
274 274 yield row
275 275
276 276 def indexentries(ui, repos, wsgireq, req, stripecount, sortcolumn='',
277 277 descending=False, subdir=''):
278 278
279 279 rows = rawindexentries(ui, repos, wsgireq, req, subdir=subdir)
280 280
281 281 sortdefault = None, False
282 282
283 283 if sortcolumn and sortdefault != (sortcolumn, descending):
284 284 sortkey = '%s_sort' % sortcolumn
285 285 rows = sorted(rows, key=lambda x: x[sortkey],
286 286 reverse=descending)
287 287
288 288 for row, parity in zip(rows, paritygen(stripecount)):
289 289 row['parity'] = parity
290 290 yield row
291 291
292 292 class hgwebdir(object):
293 293 """HTTP server for multiple repositories.
294 294
295 295 Given a configuration, different repositories will be served depending
296 296 on the request path.
297 297
298 298 Instances are typically used as WSGI applications.
299 299 """
300 300 def __init__(self, conf, baseui=None):
301 301 self.conf = conf
302 302 self.baseui = baseui
303 303 self.ui = None
304 304 self.lastrefresh = 0
305 305 self.motd = None
306 306 self.refresh()
307 307
308 308 def refresh(self):
309 309 if self.ui:
310 310 refreshinterval = self.ui.configint('web', 'refreshinterval')
311 311 else:
312 312 item = configitems.coreitems['web']['refreshinterval']
313 313 refreshinterval = item.default
314 314
315 315 # refreshinterval <= 0 means to always refresh.
316 316 if (refreshinterval > 0 and
317 317 self.lastrefresh + refreshinterval > time.time()):
318 318 return
319 319
320 320 if self.baseui:
321 321 u = self.baseui.copy()
322 322 else:
323 323 u = uimod.ui.load()
324 324 u.setconfig('ui', 'report_untrusted', 'off', 'hgwebdir')
325 325 u.setconfig('ui', 'nontty', 'true', 'hgwebdir')
326 326 # displaying bundling progress bar while serving feels wrong and may
327 327 # break some wsgi implementations.
328 328 u.setconfig('progress', 'disable', 'true', 'hgweb')
329 329
330 330 if not isinstance(self.conf, (dict, list, tuple)):
331 331 map = {'paths': 'hgweb-paths'}
332 332 if not os.path.exists(self.conf):
333 333 raise error.Abort(_('config file %s not found!') % self.conf)
334 334 u.readconfig(self.conf, remap=map, trust=True)
335 335 paths = []
336 336 for name, ignored in u.configitems('hgweb-paths'):
337 337 for path in u.configlist('hgweb-paths', name):
338 338 paths.append((name, path))
339 339 elif isinstance(self.conf, (list, tuple)):
340 340 paths = self.conf
341 341 elif isinstance(self.conf, dict):
342 342 paths = self.conf.items()
343 343
344 344 repos = findrepos(paths)
345 345 for prefix, root in u.configitems('collections'):
346 346 prefix = util.pconvert(prefix)
347 347 for path in scmutil.walkrepos(root, followsym=True):
348 348 repo = os.path.normpath(path)
349 349 name = util.pconvert(repo)
350 350 if name.startswith(prefix):
351 351 name = name[len(prefix):]
352 352 repos.append((name.lstrip('/'), repo))
353 353
354 354 self.repos = repos
355 355 self.ui = u
356 356 encoding.encoding = self.ui.config('web', 'encoding')
357 357 self.style = self.ui.config('web', 'style')
358 358 self.templatepath = self.ui.config('web', 'templates', untrusted=False)
359 359 self.stripecount = self.ui.config('web', 'stripes')
360 360 if self.stripecount:
361 361 self.stripecount = int(self.stripecount)
362 362 self._baseurl = self.ui.config('web', 'baseurl')
363 363 prefix = self.ui.config('web', 'prefix')
364 364 if prefix.startswith('/'):
365 365 prefix = prefix[1:]
366 366 if prefix.endswith('/'):
367 367 prefix = prefix[:-1]
368 368 self.prefix = prefix
369 369 self.lastrefresh = time.time()
370 370
371 371 def run(self):
372 372 if not encoding.environ.get('GATEWAY_INTERFACE',
373 373 '').startswith("CGI/1."):
374 374 raise RuntimeError("This function is only intended to be "
375 375 "called while running as a CGI script.")
376 376 wsgicgi.launch(self)
377 377
378 378 def __call__(self, env, respond):
379 379 wsgireq = requestmod.wsgirequest(env, respond)
380 380 return self.run_wsgi(wsgireq)
381 381
382 382 def run_wsgi(self, wsgireq):
383 383 profile = self.ui.configbool('profiling', 'enabled')
384 384 with profiling.profile(self.ui, enabled=profile):
385 385 for r in self._runwsgi(wsgireq):
386 386 yield r
387 387
388 388 def _runwsgi(self, wsgireq):
389 389 req = wsgireq.req
390 390 res = wsgireq.res
391 391
392 392 try:
393 393 self.refresh()
394 394
395 395 csp, nonce = cspvalues(self.ui)
396 396 if csp:
397 397 res.headers['Content-Security-Policy'] = csp
398 398 wsgireq.headers.append(('Content-Security-Policy', csp))
399 399
400 400 virtual = wsgireq.env.get("PATH_INFO", "").strip('/')
401 401 tmpl = self.templater(wsgireq, nonce)
402 402 ctype = tmpl('mimetype', encoding=encoding.encoding)
403 403 ctype = templater.stringify(ctype)
404 404
405 405 # Global defaults. These can be overridden by any handler.
406 406 res.status = '200 Script output follows'
407 407 res.headers['Content-Type'] = ctype
408 408
409 409 # a static file
410 410 if virtual.startswith('static/') or 'static' in req.qsparams:
411 411 if virtual.startswith('static/'):
412 412 fname = virtual[7:]
413 413 else:
414 414 fname = req.qsparams['static']
415 415 static = self.ui.config("web", "static", None,
416 416 untrusted=False)
417 417 if not static:
418 418 tp = self.templatepath or templater.templatepaths()
419 419 if isinstance(tp, str):
420 420 tp = [tp]
421 421 static = [os.path.join(p, 'static') for p in tp]
422 422
423 423 staticfile(static, fname, res)
424 424 return res.sendresponse()
425 425
426 426 # top-level index
427 427
428 428 repos = dict(self.repos)
429 429
430 430 if (not virtual or virtual == 'index') and virtual not in repos:
431 431 wsgireq.respond(HTTP_OK, ctype)
432 432 return self.makeindex(wsgireq, tmpl)
433 433
434 434 # nested indexes and hgwebs
435 435
436 436 if virtual.endswith('/index') and virtual not in repos:
437 437 subdir = virtual[:-len('index')]
438 438 if any(r.startswith(subdir) for r in repos):
439 439 wsgireq.respond(HTTP_OK, ctype)
440 440 return self.makeindex(wsgireq, tmpl, subdir)
441 441
442 442 def _virtualdirs():
443 443 # Check the full virtual path, each parent, and the root ('')
444 444 if virtual != '':
445 445 yield virtual
446 446
447 447 for p in util.finddirs(virtual):
448 448 yield p
449 449
450 450 yield ''
451 451
452 452 for virtualrepo in _virtualdirs():
453 453 real = repos.get(virtualrepo)
454 454 if real:
455 wsgireq.env['REPO_NAME'] = virtualrepo
456 # We have to re-parse because of updated environment
457 # variable.
458 # TODO this is kind of hacky and we should have a better
459 # way of doing this than with REPO_NAME side-effects.
455 # Re-parse the WSGI environment to take into account our
456 # repository path component.
460 457 wsgireq.req = requestmod.parserequestfromenv(
461 wsgireq.env, wsgireq.req.bodyfh)
458 wsgireq.env, wsgireq.req.bodyfh, reponame=virtualrepo)
462 459 try:
463 460 # ensure caller gets private copy of ui
464 461 repo = hg.repository(self.ui.copy(), real)
465 462 return hgweb_mod.hgweb(repo).run_wsgi(wsgireq)
466 463 except IOError as inst:
467 464 msg = encoding.strtolocal(inst.strerror)
468 465 raise ErrorResponse(HTTP_SERVER_ERROR, msg)
469 466 except error.RepoError as inst:
470 467 raise ErrorResponse(HTTP_SERVER_ERROR, bytes(inst))
471 468
472 469 # browse subdirectories
473 470 subdir = virtual + '/'
474 471 if [r for r in repos if r.startswith(subdir)]:
475 472 wsgireq.respond(HTTP_OK, ctype)
476 473 return self.makeindex(wsgireq, tmpl, subdir)
477 474
478 475 # prefixes not found
479 476 wsgireq.respond(HTTP_NOT_FOUND, ctype)
480 477 return tmpl("notfound", repo=virtual)
481 478
482 479 except ErrorResponse as err:
483 480 wsgireq.respond(err, ctype)
484 481 return tmpl('error', error=err.message or '')
485 482 finally:
486 483 tmpl = None
487 484
488 485 def makeindex(self, wsgireq, tmpl, subdir=""):
489 486 req = wsgireq.req
490 487
491 488 self.refresh()
492 489 sortable = ["name", "description", "contact", "lastchange"]
493 490 sortcolumn, descending = None, False
494 491 if 'sort' in req.qsparams:
495 492 sortcolumn = req.qsparams['sort']
496 493 descending = sortcolumn.startswith('-')
497 494 if descending:
498 495 sortcolumn = sortcolumn[1:]
499 496 if sortcolumn not in sortable:
500 497 sortcolumn = ""
501 498
502 499 sort = [("sort_%s" % column,
503 500 "%s%s" % ((not descending and column == sortcolumn)
504 501 and "-" or "", column))
505 502 for column in sortable]
506 503
507 504 self.refresh()
508 505 self.updatereqenv(wsgireq.env)
509 506
510 507 entries = indexentries(self.ui, self.repos, wsgireq, req,
511 508 self.stripecount, sortcolumn=sortcolumn,
512 509 descending=descending, subdir=subdir)
513 510
514 511 return tmpl("index", entries=entries, subdir=subdir,
515 512 pathdef=hgweb_mod.makebreadcrumb('/' + subdir, self.prefix),
516 513 sortcolumn=sortcolumn, descending=descending,
517 514 **dict(sort))
518 515
519 516 def templater(self, wsgireq, nonce):
520 517
521 518 def motd(**map):
522 519 if self.motd is not None:
523 520 yield self.motd
524 521 else:
525 522 yield config('web', 'motd')
526 523
527 524 def config(section, name, default=uimod._unset, untrusted=True):
528 525 return self.ui.config(section, name, default, untrusted)
529 526
530 527 self.updatereqenv(wsgireq.env)
531 528
532 529 url = wsgireq.env.get('SCRIPT_NAME', '')
533 530 if not url.endswith('/'):
534 531 url += '/'
535 532
536 533 vars = {}
537 534 styles, (style, mapfile) = hgweb_mod.getstyle(wsgireq.req, config,
538 535 self.templatepath)
539 536 if style == styles[0]:
540 537 vars['style'] = style
541 538
542 539 sessionvars = webutil.sessionvars(vars, r'?')
543 540 logourl = config('web', 'logourl')
544 541 logoimg = config('web', 'logoimg')
545 542 staticurl = (config('web', 'staticurl')
546 543 or wsgireq.req.apppath + '/static/')
547 544 if not staticurl.endswith('/'):
548 545 staticurl += '/'
549 546
550 547 defaults = {
551 548 "encoding": encoding.encoding,
552 549 "motd": motd,
553 550 "url": url,
554 551 "logourl": logourl,
555 552 "logoimg": logoimg,
556 553 "staticurl": staticurl,
557 554 "sessionvars": sessionvars,
558 555 "style": style,
559 556 "nonce": nonce,
560 557 }
561 558 tmpl = templater.templater.frommapfile(mapfile, defaults=defaults)
562 559 return tmpl
563 560
564 561 def updatereqenv(self, env):
565 562 if self._baseurl is not None:
566 563 name, port, path = geturlcgivars(self._baseurl, env['SERVER_PORT'])
567 564 env['SERVER_NAME'] = name
568 565 env['SERVER_PORT'] = port
569 566 env['SCRIPT_NAME'] = path
@@ -1,651 +1,662 b''
1 1 # hgweb/request.py - An http request from either CGI or the standalone server.
2 2 #
3 3 # Copyright 21 May 2005 - (c) 2005 Jake Edge <jake@edge2.net>
4 4 # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import socket
13 13 import wsgiref.headers as wsgiheaders
14 14 #import wsgiref.validate
15 15
16 16 from .common import (
17 17 ErrorResponse,
18 18 statusmessage,
19 19 )
20 20
21 21 from ..thirdparty import (
22 22 attr,
23 23 )
24 24 from .. import (
25 25 error,
26 26 pycompat,
27 27 util,
28 28 )
29 29
30 30 class multidict(object):
31 31 """A dict like object that can store multiple values for a key.
32 32
33 33 Used to store parsed request parameters.
34 34
35 35 This is inspired by WebOb's class of the same name.
36 36 """
37 37 def __init__(self):
38 38 # Stores (key, value) 2-tuples. This isn't the most efficient. But we
39 39 # don't rely on parameters that much, so it shouldn't be a perf issue.
40 40 # we can always add dict for fast lookups.
41 41 self._items = []
42 42
43 43 def __getitem__(self, key):
44 44 """Returns the last set value for a key."""
45 45 for k, v in reversed(self._items):
46 46 if k == key:
47 47 return v
48 48
49 49 raise KeyError(key)
50 50
51 51 def __setitem__(self, key, value):
52 52 """Replace a values for a key with a new value."""
53 53 try:
54 54 del self[key]
55 55 except KeyError:
56 56 pass
57 57
58 58 self._items.append((key, value))
59 59
60 60 def __delitem__(self, key):
61 61 """Delete all values for a key."""
62 62 oldlen = len(self._items)
63 63
64 64 self._items[:] = [(k, v) for k, v in self._items if k != key]
65 65
66 66 if oldlen == len(self._items):
67 67 raise KeyError(key)
68 68
69 69 def __contains__(self, key):
70 70 return any(k == key for k, v in self._items)
71 71
72 72 def __len__(self):
73 73 return len(self._items)
74 74
75 75 def get(self, key, default=None):
76 76 try:
77 77 return self.__getitem__(key)
78 78 except KeyError:
79 79 return default
80 80
81 81 def add(self, key, value):
82 82 """Add a new value for a key. Does not replace existing values."""
83 83 self._items.append((key, value))
84 84
85 85 def getall(self, key):
86 86 """Obtains all values for a key."""
87 87 return [v for k, v in self._items if k == key]
88 88
89 89 def getone(self, key):
90 90 """Obtain a single value for a key.
91 91
92 92 Raises KeyError if key not defined or it has multiple values set.
93 93 """
94 94 vals = self.getall(key)
95 95
96 96 if not vals:
97 97 raise KeyError(key)
98 98
99 99 if len(vals) > 1:
100 100 raise KeyError('multiple values for %r' % key)
101 101
102 102 return vals[0]
103 103
104 104 def asdictoflists(self):
105 105 d = {}
106 106 for k, v in self._items:
107 107 if k in d:
108 108 d[k].append(v)
109 109 else:
110 110 d[k] = [v]
111 111
112 112 return d
113 113
114 114 @attr.s(frozen=True)
115 115 class parsedrequest(object):
116 116 """Represents a parsed WSGI request.
117 117
118 118 Contains both parsed parameters as well as a handle on the input stream.
119 119 """
120 120
121 121 # Request method.
122 122 method = attr.ib()
123 123 # Full URL for this request.
124 124 url = attr.ib()
125 125 # URL without any path components. Just <proto>://<host><port>.
126 126 baseurl = attr.ib()
127 127 # Advertised URL. Like ``url`` and ``baseurl`` but uses SERVER_NAME instead
128 128 # of HTTP: Host header for hostname. This is likely what clients used.
129 129 advertisedurl = attr.ib()
130 130 advertisedbaseurl = attr.ib()
131 131 # URL scheme (part before ``://``). e.g. ``http`` or ``https``.
132 132 urlscheme = attr.ib()
133 133 # Value of REMOTE_USER, if set, or None.
134 134 remoteuser = attr.ib()
135 135 # Value of REMOTE_HOST, if set, or None.
136 136 remotehost = attr.ib()
137 137 # WSGI application path.
138 138 apppath = attr.ib()
139 139 # List of path parts to be used for dispatch.
140 140 dispatchparts = attr.ib()
141 141 # URL path component (no query string) used for dispatch.
142 142 dispatchpath = attr.ib()
143 143 # Whether there is a path component to this request. This can be true
144 144 # when ``dispatchpath`` is empty due to REPO_NAME muckery.
145 145 havepathinfo = attr.ib()
146 146 # The name of the repository being accessed.
147 147 reponame = attr.ib()
148 148 # Raw query string (part after "?" in URL).
149 149 querystring = attr.ib()
150 150 # multidict of query string parameters.
151 151 qsparams = attr.ib()
152 152 # wsgiref.headers.Headers instance. Operates like a dict with case
153 153 # insensitive keys.
154 154 headers = attr.ib()
155 155 # Request body input stream.
156 156 bodyfh = attr.ib()
157 157
158 def parserequestfromenv(env, bodyfh):
158 def parserequestfromenv(env, bodyfh, reponame=None):
159 159 """Parse URL components from environment variables.
160 160
161 161 WSGI defines request attributes via environment variables. This function
162 162 parses the environment variables into a data structure.
163
164 If ``reponame`` is defined, the leading path components matching that
165 string are effectively shifted from ``PATH_INFO`` to ``SCRIPT_NAME``.
166 This simulates the world view of a WSGI application that processes
167 requests from the base URL of a repo.
163 168 """
164 169 # PEP-0333 defines the WSGI spec and is a useful reference for this code.
165 170
166 171 # We first validate that the incoming object conforms with the WSGI spec.
167 172 # We only want to be dealing with spec-conforming WSGI implementations.
168 173 # TODO enable this once we fix internal violations.
169 174 #wsgiref.validate.check_environ(env)
170 175
171 176 # PEP-0333 states that environment keys and values are native strings
172 177 # (bytes on Python 2 and str on Python 3). The code points for the Unicode
173 178 # strings on Python 3 must be between \00000-\000FF. We deal with bytes
174 179 # in Mercurial, so mass convert string keys and values to bytes.
175 180 if pycompat.ispy3:
176 181 env = {k.encode('latin-1'): v for k, v in env.iteritems()}
177 182 env = {k: v.encode('latin-1') if isinstance(v, str) else v
178 183 for k, v in env.iteritems()}
179 184
180 185 # https://www.python.org/dev/peps/pep-0333/#environ-variables defines
181 186 # the environment variables.
182 187 # https://www.python.org/dev/peps/pep-0333/#url-reconstruction defines
183 188 # how URLs are reconstructed.
184 189 fullurl = env['wsgi.url_scheme'] + '://'
185 190 advertisedfullurl = fullurl
186 191
187 192 def addport(s):
188 193 if env['wsgi.url_scheme'] == 'https':
189 194 if env['SERVER_PORT'] != '443':
190 195 s += ':' + env['SERVER_PORT']
191 196 else:
192 197 if env['SERVER_PORT'] != '80':
193 198 s += ':' + env['SERVER_PORT']
194 199
195 200 return s
196 201
197 202 if env.get('HTTP_HOST'):
198 203 fullurl += env['HTTP_HOST']
199 204 else:
200 205 fullurl += env['SERVER_NAME']
201 206 fullurl = addport(fullurl)
202 207
203 208 advertisedfullurl += env['SERVER_NAME']
204 209 advertisedfullurl = addport(advertisedfullurl)
205 210
206 211 baseurl = fullurl
207 212 advertisedbaseurl = advertisedfullurl
208 213
209 214 fullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
210 215 advertisedfullurl += util.urlreq.quote(env.get('SCRIPT_NAME', ''))
211 216 fullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
212 217 advertisedfullurl += util.urlreq.quote(env.get('PATH_INFO', ''))
213 218
214 219 if env.get('QUERY_STRING'):
215 220 fullurl += '?' + env['QUERY_STRING']
216 221 advertisedfullurl += '?' + env['QUERY_STRING']
217 222
218 # When dispatching requests, we look at the URL components (PATH_INFO
219 # and QUERY_STRING) after the application root (SCRIPT_NAME). But hgwebdir
220 # has the concept of "virtual" repositories. This is defined via REPO_NAME.
221 # If REPO_NAME is defined, we append it to SCRIPT_NAME to form a new app
222 # root. We also exclude its path components from PATH_INFO when resolving
223 # the dispatch path.
223 # If ``reponame`` is defined, that must be a prefix on PATH_INFO
224 # that represents the repository being dispatched to. When computing
225 # the dispatch info, we ignore these leading path components.
224 226
225 227 apppath = env.get('SCRIPT_NAME', '')
226 228
227 if env.get('REPO_NAME'):
228 if not apppath.endswith('/'):
229 apppath += '/'
229 if reponame:
230 repoprefix = '/' + reponame.strip('/')
230 231
231 apppath += env.get('REPO_NAME')
232 if not env.get('PATH_INFO'):
233 raise error.ProgrammingError('reponame requires PATH_INFO')
234
235 if not env['PATH_INFO'].startswith(repoprefix):
236 raise error.ProgrammingError('PATH_INFO does not begin with repo '
237 'name: %s (%s)' % (env['PATH_INFO'],
238 reponame))
232 239
233 if 'PATH_INFO' in env:
234 dispatchparts = env['PATH_INFO'].strip('/').split('/')
240 dispatchpath = env['PATH_INFO'][len(repoprefix):]
235 241
236 # Strip out repo parts.
237 repoparts = env.get('REPO_NAME', '').split('/')
238 if dispatchparts[:len(repoparts)] == repoparts:
239 dispatchparts = dispatchparts[len(repoparts):]
242 if dispatchpath and not dispatchpath.startswith('/'):
243 raise error.ProgrammingError('reponame prefix of PATH_INFO does '
244 'not end at path delimiter: %s (%s)' %
245 (env['PATH_INFO'], reponame))
246
247 apppath = apppath.rstrip('/') + repoprefix
248 dispatchparts = dispatchpath.strip('/').split('/')
249 elif env.get('PATH_INFO', '').strip('/'):
250 dispatchparts = env['PATH_INFO'].strip('/').split('/')
240 251 else:
241 252 dispatchparts = []
242 253
243 254 dispatchpath = '/'.join(dispatchparts)
244 255
245 256 querystring = env.get('QUERY_STRING', '')
246 257
247 258 # We store as a list so we have ordering information. We also store as
248 259 # a dict to facilitate fast lookup.
249 260 qsparams = multidict()
250 261 for k, v in util.urlreq.parseqsl(querystring, keep_blank_values=True):
251 262 qsparams.add(k, v)
252 263
253 264 # HTTP_* keys contain HTTP request headers. The Headers structure should
254 265 # perform case normalization for us. We just rewrite underscore to dash
255 266 # so keys match what likely went over the wire.
256 267 headers = []
257 268 for k, v in env.iteritems():
258 269 if k.startswith('HTTP_'):
259 270 headers.append((k[len('HTTP_'):].replace('_', '-'), v))
260 271
261 272 headers = wsgiheaders.Headers(headers)
262 273
263 274 # This is kind of a lie because the HTTP header wasn't explicitly
264 275 # sent. But for all intents and purposes it should be OK to lie about
265 276 # this, since a consumer will either either value to determine how many
266 277 # bytes are available to read.
267 278 if 'CONTENT_LENGTH' in env and 'HTTP_CONTENT_LENGTH' not in env:
268 279 headers['Content-Length'] = env['CONTENT_LENGTH']
269 280
270 281 # TODO do this once we remove wsgirequest.inp, otherwise we could have
271 282 # multiple readers from the underlying input stream.
272 283 #bodyfh = env['wsgi.input']
273 284 #if 'Content-Length' in headers:
274 285 # bodyfh = util.cappedreader(bodyfh, int(headers['Content-Length']))
275 286
276 287 return parsedrequest(method=env['REQUEST_METHOD'],
277 288 url=fullurl, baseurl=baseurl,
278 289 advertisedurl=advertisedfullurl,
279 290 advertisedbaseurl=advertisedbaseurl,
280 291 urlscheme=env['wsgi.url_scheme'],
281 292 remoteuser=env.get('REMOTE_USER'),
282 293 remotehost=env.get('REMOTE_HOST'),
283 294 apppath=apppath,
284 295 dispatchparts=dispatchparts, dispatchpath=dispatchpath,
285 296 havepathinfo='PATH_INFO' in env,
286 reponame=env.get('REPO_NAME'),
297 reponame=reponame,
287 298 querystring=querystring,
288 299 qsparams=qsparams,
289 300 headers=headers,
290 301 bodyfh=bodyfh)
291 302
292 303 class offsettrackingwriter(object):
293 304 """A file object like object that is append only and tracks write count.
294 305
295 306 Instances are bound to a callable. This callable is called with data
296 307 whenever a ``write()`` is attempted.
297 308
298 309 Instances track the amount of written data so they can answer ``tell()``
299 310 requests.
300 311
301 312 The intent of this class is to wrap the ``write()`` function returned by
302 313 a WSGI ``start_response()`` function. Since ``write()`` is a callable and
303 314 not a file object, it doesn't implement other file object methods.
304 315 """
305 316 def __init__(self, writefn):
306 317 self._write = writefn
307 318 self._offset = 0
308 319
309 320 def write(self, s):
310 321 res = self._write(s)
311 322 # Some Python objects don't report the number of bytes written.
312 323 if res is None:
313 324 self._offset += len(s)
314 325 else:
315 326 self._offset += res
316 327
317 328 def flush(self):
318 329 pass
319 330
320 331 def tell(self):
321 332 return self._offset
322 333
323 334 class wsgiresponse(object):
324 335 """Represents a response to a WSGI request.
325 336
326 337 A response consists of a status line, headers, and a body.
327 338
328 339 Consumers must populate the ``status`` and ``headers`` fields and
329 340 make a call to a ``setbody*()`` method before the response can be
330 341 issued.
331 342
332 343 When it is time to start sending the response over the wire,
333 344 ``sendresponse()`` is called. It handles emitting the header portion
334 345 of the response message. It then yields chunks of body data to be
335 346 written to the peer. Typically, the WSGI application itself calls
336 347 and returns the value from ``sendresponse()``.
337 348 """
338 349
339 350 def __init__(self, req, startresponse):
340 351 """Create an empty response tied to a specific request.
341 352
342 353 ``req`` is a ``parsedrequest``. ``startresponse`` is the
343 354 ``start_response`` function passed to the WSGI application.
344 355 """
345 356 self._req = req
346 357 self._startresponse = startresponse
347 358
348 359 self.status = None
349 360 self.headers = wsgiheaders.Headers([])
350 361
351 362 self._bodybytes = None
352 363 self._bodygen = None
353 364 self._bodywillwrite = False
354 365 self._started = False
355 366 self._bodywritefn = None
356 367
357 368 def _verifybody(self):
358 369 if (self._bodybytes is not None or self._bodygen is not None
359 370 or self._bodywillwrite):
360 371 raise error.ProgrammingError('cannot define body multiple times')
361 372
362 373 def setbodybytes(self, b):
363 374 """Define the response body as static bytes.
364 375
365 376 The empty string signals that there is no response body.
366 377 """
367 378 self._verifybody()
368 379 self._bodybytes = b
369 380 self.headers['Content-Length'] = '%d' % len(b)
370 381
371 382 def setbodygen(self, gen):
372 383 """Define the response body as a generator of bytes."""
373 384 self._verifybody()
374 385 self._bodygen = gen
375 386
376 387 def setbodywillwrite(self):
377 388 """Signal an intent to use write() to emit the response body.
378 389
379 390 **This is the least preferred way to send a body.**
380 391
381 392 It is preferred for WSGI applications to emit a generator of chunks
382 393 constituting the response body. However, some consumers can't emit
383 394 data this way. So, WSGI provides a way to obtain a ``write(data)``
384 395 function that can be used to synchronously perform an unbuffered
385 396 write.
386 397
387 398 Calling this function signals an intent to produce the body in this
388 399 manner.
389 400 """
390 401 self._verifybody()
391 402 self._bodywillwrite = True
392 403
393 404 def sendresponse(self):
394 405 """Send the generated response to the client.
395 406
396 407 Before this is called, ``status`` must be set and one of
397 408 ``setbodybytes()`` or ``setbodygen()`` must be called.
398 409
399 410 Calling this method multiple times is not allowed.
400 411 """
401 412 if self._started:
402 413 raise error.ProgrammingError('sendresponse() called multiple times')
403 414
404 415 self._started = True
405 416
406 417 if not self.status:
407 418 raise error.ProgrammingError('status line not defined')
408 419
409 420 if (self._bodybytes is None and self._bodygen is None
410 421 and not self._bodywillwrite):
411 422 raise error.ProgrammingError('response body not defined')
412 423
413 424 # RFC 7232 Section 4.1 states that a 304 MUST generate one of
414 425 # {Cache-Control, Content-Location, Date, ETag, Expires, Vary}
415 426 # and SHOULD NOT generate other headers unless they could be used
416 427 # to guide cache updates. Furthermore, RFC 7230 Section 3.3.2
417 428 # states that no response body can be issued. Content-Length can
418 429 # be sent. But if it is present, it should be the size of the response
419 430 # that wasn't transferred.
420 431 if self.status.startswith('304 '):
421 432 # setbodybytes('') will set C-L to 0. This doesn't conform with the
422 433 # spec. So remove it.
423 434 if self.headers.get('Content-Length') == '0':
424 435 del self.headers['Content-Length']
425 436
426 437 # Strictly speaking, this is too strict. But until it causes
427 438 # problems, let's be strict.
428 439 badheaders = {k for k in self.headers.keys()
429 440 if k.lower() not in ('date', 'etag', 'expires',
430 441 'cache-control',
431 442 'content-location',
432 443 'vary')}
433 444 if badheaders:
434 445 raise error.ProgrammingError(
435 446 'illegal header on 304 response: %s' %
436 447 ', '.join(sorted(badheaders)))
437 448
438 449 if self._bodygen is not None or self._bodywillwrite:
439 450 raise error.ProgrammingError("must use setbodybytes('') with "
440 451 "304 responses")
441 452
442 453 # Various HTTP clients (notably httplib) won't read the HTTP response
443 454 # until the HTTP request has been sent in full. If servers (us) send a
444 455 # response before the HTTP request has been fully sent, the connection
445 456 # may deadlock because neither end is reading.
446 457 #
447 458 # We work around this by "draining" the request data before
448 459 # sending any response in some conditions.
449 460 drain = False
450 461 close = False
451 462
452 463 # If the client sent Expect: 100-continue, we assume it is smart enough
453 464 # to deal with the server sending a response before reading the request.
454 465 # (httplib doesn't do this.)
455 466 if self._req.headers.get('Expect', '').lower() == '100-continue':
456 467 pass
457 468 # Only tend to request methods that have bodies. Strictly speaking,
458 469 # we should sniff for a body. But this is fine for our existing
459 470 # WSGI applications.
460 471 elif self._req.method not in ('POST', 'PUT'):
461 472 pass
462 473 else:
463 474 # If we don't know how much data to read, there's no guarantee
464 475 # that we can drain the request responsibly. The WSGI
465 476 # specification only says that servers *should* ensure the
466 477 # input stream doesn't overrun the actual request. So there's
467 478 # no guarantee that reading until EOF won't corrupt the stream
468 479 # state.
469 480 if not isinstance(self._req.bodyfh, util.cappedreader):
470 481 close = True
471 482 else:
472 483 # We /could/ only drain certain HTTP response codes. But 200 and
473 484 # non-200 wire protocol responses both require draining. Since
474 485 # we have a capped reader in place for all situations where we
475 486 # drain, it is safe to read from that stream. We'll either do
476 487 # a drain or no-op if we're already at EOF.
477 488 drain = True
478 489
479 490 if close:
480 491 self.headers['Connection'] = 'Close'
481 492
482 493 if drain:
483 494 assert isinstance(self._req.bodyfh, util.cappedreader)
484 495 while True:
485 496 chunk = self._req.bodyfh.read(32768)
486 497 if not chunk:
487 498 break
488 499
489 500 write = self._startresponse(pycompat.sysstr(self.status),
490 501 self.headers.items())
491 502
492 503 if self._bodybytes:
493 504 yield self._bodybytes
494 505 elif self._bodygen:
495 506 for chunk in self._bodygen:
496 507 yield chunk
497 508 elif self._bodywillwrite:
498 509 self._bodywritefn = write
499 510 else:
500 511 error.ProgrammingError('do not know how to send body')
501 512
502 513 def getbodyfile(self):
503 514 """Obtain a file object like object representing the response body.
504 515
505 516 For this to work, you must call ``setbodywillwrite()`` and then
506 517 ``sendresponse()`` first. ``sendresponse()`` is a generator and the
507 518 function won't run to completion unless the generator is advanced. The
508 519 generator yields not items. The easiest way to consume it is with
509 520 ``list(res.sendresponse())``, which should resolve to an empty list -
510 521 ``[]``.
511 522 """
512 523 if not self._bodywillwrite:
513 524 raise error.ProgrammingError('must call setbodywillwrite() first')
514 525
515 526 if not self._started:
516 527 raise error.ProgrammingError('must call sendresponse() first; did '
517 528 'you remember to consume it since it '
518 529 'is a generator?')
519 530
520 531 assert self._bodywritefn
521 532 return offsettrackingwriter(self._bodywritefn)
522 533
523 534 class wsgirequest(object):
524 535 """Higher-level API for a WSGI request.
525 536
526 537 WSGI applications are invoked with 2 arguments. They are used to
527 538 instantiate instances of this class, which provides higher-level APIs
528 539 for obtaining request parameters, writing HTTP output, etc.
529 540 """
530 541 def __init__(self, wsgienv, start_response):
531 542 version = wsgienv[r'wsgi.version']
532 543 if (version < (1, 0)) or (version >= (2, 0)):
533 544 raise RuntimeError("Unknown and unsupported WSGI version %d.%d"
534 545 % version)
535 546
536 547 inp = wsgienv[r'wsgi.input']
537 548
538 549 if r'HTTP_CONTENT_LENGTH' in wsgienv:
539 550 inp = util.cappedreader(inp, int(wsgienv[r'HTTP_CONTENT_LENGTH']))
540 551 elif r'CONTENT_LENGTH' in wsgienv:
541 552 inp = util.cappedreader(inp, int(wsgienv[r'CONTENT_LENGTH']))
542 553
543 554 self.err = wsgienv[r'wsgi.errors']
544 555 self.threaded = wsgienv[r'wsgi.multithread']
545 556 self.multiprocess = wsgienv[r'wsgi.multiprocess']
546 557 self.run_once = wsgienv[r'wsgi.run_once']
547 558 self.env = wsgienv
548 559 self.req = parserequestfromenv(wsgienv, inp)
549 560 self.res = wsgiresponse(self.req, start_response)
550 561 self._start_response = start_response
551 562 self.server_write = None
552 563 self.headers = []
553 564
554 565 def respond(self, status, type, filename=None, body=None):
555 566 if not isinstance(type, str):
556 567 type = pycompat.sysstr(type)
557 568 if self._start_response is not None:
558 569 self.headers.append((r'Content-Type', type))
559 570 if filename:
560 571 filename = (filename.rpartition('/')[-1]
561 572 .replace('\\', '\\\\').replace('"', '\\"'))
562 573 self.headers.append(('Content-Disposition',
563 574 'inline; filename="%s"' % filename))
564 575 if body is not None:
565 576 self.headers.append((r'Content-Length', str(len(body))))
566 577
567 578 for k, v in self.headers:
568 579 if not isinstance(v, str):
569 580 raise TypeError('header value must be string: %r' % (v,))
570 581
571 582 if isinstance(status, ErrorResponse):
572 583 self.headers.extend(status.headers)
573 584 status = statusmessage(status.code, pycompat.bytestr(status))
574 585 elif status == 200:
575 586 status = '200 Script output follows'
576 587 elif isinstance(status, int):
577 588 status = statusmessage(status)
578 589
579 590 # Various HTTP clients (notably httplib) won't read the HTTP
580 591 # response until the HTTP request has been sent in full. If servers
581 592 # (us) send a response before the HTTP request has been fully sent,
582 593 # the connection may deadlock because neither end is reading.
583 594 #
584 595 # We work around this by "draining" the request data before
585 596 # sending any response in some conditions.
586 597 drain = False
587 598 close = False
588 599
589 600 # If the client sent Expect: 100-continue, we assume it is smart
590 601 # enough to deal with the server sending a response before reading
591 602 # the request. (httplib doesn't do this.)
592 603 if self.env.get(r'HTTP_EXPECT', r'').lower() == r'100-continue':
593 604 pass
594 605 # Only tend to request methods that have bodies. Strictly speaking,
595 606 # we should sniff for a body. But this is fine for our existing
596 607 # WSGI applications.
597 608 elif self.env[r'REQUEST_METHOD'] not in (r'POST', r'PUT'):
598 609 pass
599 610 else:
600 611 # If we don't know how much data to read, there's no guarantee
601 612 # that we can drain the request responsibly. The WSGI
602 613 # specification only says that servers *should* ensure the
603 614 # input stream doesn't overrun the actual request. So there's
604 615 # no guarantee that reading until EOF won't corrupt the stream
605 616 # state.
606 617 if not isinstance(self.req.bodyfh, util.cappedreader):
607 618 close = True
608 619 else:
609 620 # We /could/ only drain certain HTTP response codes. But 200
610 621 # and non-200 wire protocol responses both require draining.
611 622 # Since we have a capped reader in place for all situations
612 623 # where we drain, it is safe to read from that stream. We'll
613 624 # either do a drain or no-op if we're already at EOF.
614 625 drain = True
615 626
616 627 if close:
617 628 self.headers.append((r'Connection', r'Close'))
618 629
619 630 if drain:
620 631 assert isinstance(self.req.bodyfh, util.cappedreader)
621 632 while True:
622 633 chunk = self.req.bodyfh.read(32768)
623 634 if not chunk:
624 635 break
625 636
626 637 self.server_write = self._start_response(
627 638 pycompat.sysstr(status), self.headers)
628 639 self._start_response = None
629 640 self.headers = []
630 641 if body is not None:
631 642 self.write(body)
632 643 self.server_write = None
633 644
634 645 def write(self, thing):
635 646 if thing:
636 647 try:
637 648 self.server_write(thing)
638 649 except socket.error as inst:
639 650 if inst[0] != errno.ECONNRESET:
640 651 raise
641 652
642 653 def flush(self):
643 654 return None
644 655
645 656 def wsgiapplication(app_maker):
646 657 '''For compatibility with old CGI scripts. A plain hgweb() or hgwebdir()
647 658 can and should now be used as a WSGI application.'''
648 659 application = app_maker()
649 660 def run_wsgi(env, respond):
650 661 return application(env, respond)
651 662 return run_wsgi
@@ -1,255 +1,259 b''
1 1 from __future__ import absolute_import, print_function
2 2
3 3 import unittest
4 4
5 5 from mercurial.hgweb import (
6 6 request as requestmod,
7 7 )
8 from mercurial import (
9 error,
10 )
8 11
9 12 DEFAULT_ENV = {
10 13 r'REQUEST_METHOD': r'GET',
11 14 r'SERVER_NAME': r'testserver',
12 15 r'SERVER_PORT': r'80',
13 16 r'SERVER_PROTOCOL': r'http',
14 17 r'wsgi.version': (1, 0),
15 18 r'wsgi.url_scheme': r'http',
16 19 r'wsgi.input': None,
17 20 r'wsgi.errors': None,
18 21 r'wsgi.multithread': False,
19 22 r'wsgi.multiprocess': True,
20 23 r'wsgi.run_once': False,
21 24 }
22 25
23 def parse(env, bodyfh=None, extra=None):
26 def parse(env, bodyfh=None, reponame=None, extra=None):
24 27 env = dict(env)
25 28 env.update(extra or {})
26 29
27 return requestmod.parserequestfromenv(env, bodyfh)
30 return requestmod.parserequestfromenv(env, bodyfh, reponame=reponame)
28 31
29 32 class ParseRequestTests(unittest.TestCase):
30 33 def testdefault(self):
31 34 r = parse(DEFAULT_ENV)
32 35 self.assertEqual(r.url, b'http://testserver')
33 36 self.assertEqual(r.baseurl, b'http://testserver')
34 37 self.assertEqual(r.advertisedurl, b'http://testserver')
35 38 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
36 39 self.assertEqual(r.urlscheme, b'http')
37 40 self.assertEqual(r.method, b'GET')
38 41 self.assertIsNone(r.remoteuser)
39 42 self.assertIsNone(r.remotehost)
40 43 self.assertEqual(r.apppath, b'')
41 44 self.assertEqual(r.dispatchparts, [])
42 45 self.assertEqual(r.dispatchpath, b'')
43 46 self.assertFalse(r.havepathinfo)
44 47 self.assertIsNone(r.reponame)
45 48 self.assertEqual(r.querystring, b'')
46 49 self.assertEqual(len(r.qsparams), 0)
47 50 self.assertEqual(len(r.headers), 0)
48 51
49 52 def testcustomport(self):
50 53 r = parse(DEFAULT_ENV, extra={
51 54 r'SERVER_PORT': r'8000',
52 55 })
53 56
54 57 self.assertEqual(r.url, b'http://testserver:8000')
55 58 self.assertEqual(r.baseurl, b'http://testserver:8000')
56 59 self.assertEqual(r.advertisedurl, b'http://testserver:8000')
57 60 self.assertEqual(r.advertisedbaseurl, b'http://testserver:8000')
58 61
59 62 r = parse(DEFAULT_ENV, extra={
60 63 r'SERVER_PORT': r'4000',
61 64 r'wsgi.url_scheme': r'https',
62 65 })
63 66
64 67 self.assertEqual(r.url, b'https://testserver:4000')
65 68 self.assertEqual(r.baseurl, b'https://testserver:4000')
66 69 self.assertEqual(r.advertisedurl, b'https://testserver:4000')
67 70 self.assertEqual(r.advertisedbaseurl, b'https://testserver:4000')
68 71
69 72 def testhttphost(self):
70 73 r = parse(DEFAULT_ENV, extra={
71 74 r'HTTP_HOST': r'altserver',
72 75 })
73 76
74 77 self.assertEqual(r.url, b'http://altserver')
75 78 self.assertEqual(r.baseurl, b'http://altserver')
76 79 self.assertEqual(r.advertisedurl, b'http://testserver')
77 80 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
78 81
79 82 def testscriptname(self):
80 83 r = parse(DEFAULT_ENV, extra={
81 84 r'SCRIPT_NAME': r'',
82 85 })
83 86
84 87 self.assertEqual(r.url, b'http://testserver')
85 88 self.assertEqual(r.baseurl, b'http://testserver')
86 89 self.assertEqual(r.advertisedurl, b'http://testserver')
87 90 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
88 91 self.assertEqual(r.apppath, b'')
89 92 self.assertEqual(r.dispatchparts, [])
90 93 self.assertEqual(r.dispatchpath, b'')
91 94 self.assertFalse(r.havepathinfo)
92 95
93 96 r = parse(DEFAULT_ENV, extra={
94 97 r'SCRIPT_NAME': r'/script',
95 98 })
96 99
97 100 self.assertEqual(r.url, b'http://testserver/script')
98 101 self.assertEqual(r.baseurl, b'http://testserver')
99 102 self.assertEqual(r.advertisedurl, b'http://testserver/script')
100 103 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
101 104 self.assertEqual(r.apppath, b'/script')
102 105 self.assertEqual(r.dispatchparts, [])
103 106 self.assertEqual(r.dispatchpath, b'')
104 107 self.assertFalse(r.havepathinfo)
105 108
106 109 r = parse(DEFAULT_ENV, extra={
107 110 r'SCRIPT_NAME': r'/multiple words',
108 111 })
109 112
110 113 self.assertEqual(r.url, b'http://testserver/multiple%20words')
111 114 self.assertEqual(r.baseurl, b'http://testserver')
112 115 self.assertEqual(r.advertisedurl, b'http://testserver/multiple%20words')
113 116 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
114 117 self.assertEqual(r.apppath, b'/multiple words')
115 118 self.assertEqual(r.dispatchparts, [])
116 119 self.assertEqual(r.dispatchpath, b'')
117 120 self.assertFalse(r.havepathinfo)
118 121
119 122 def testpathinfo(self):
120 123 r = parse(DEFAULT_ENV, extra={
121 124 r'PATH_INFO': r'',
122 125 })
123 126
124 127 self.assertEqual(r.url, b'http://testserver')
125 128 self.assertEqual(r.baseurl, b'http://testserver')
126 129 self.assertEqual(r.advertisedurl, b'http://testserver')
127 130 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
128 131 self.assertEqual(r.apppath, b'')
129 132 self.assertEqual(r.dispatchparts, [])
130 133 self.assertEqual(r.dispatchpath, b'')
131 134 self.assertTrue(r.havepathinfo)
132 135
133 136 r = parse(DEFAULT_ENV, extra={
134 137 r'PATH_INFO': r'/pathinfo',
135 138 })
136 139
137 140 self.assertEqual(r.url, b'http://testserver/pathinfo')
138 141 self.assertEqual(r.baseurl, b'http://testserver')
139 142 self.assertEqual(r.advertisedurl, b'http://testserver/pathinfo')
140 143 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
141 144 self.assertEqual(r.apppath, b'')
142 145 self.assertEqual(r.dispatchparts, [b'pathinfo'])
143 146 self.assertEqual(r.dispatchpath, b'pathinfo')
144 147 self.assertTrue(r.havepathinfo)
145 148
146 149 r = parse(DEFAULT_ENV, extra={
147 150 r'PATH_INFO': r'/one/two/',
148 151 })
149 152
150 153 self.assertEqual(r.url, b'http://testserver/one/two/')
151 154 self.assertEqual(r.baseurl, b'http://testserver')
152 155 self.assertEqual(r.advertisedurl, b'http://testserver/one/two/')
153 156 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
154 157 self.assertEqual(r.apppath, b'')
155 158 self.assertEqual(r.dispatchparts, [b'one', b'two'])
156 159 self.assertEqual(r.dispatchpath, b'one/two')
157 160 self.assertTrue(r.havepathinfo)
158 161
159 162 def testscriptandpathinfo(self):
160 163 r = parse(DEFAULT_ENV, extra={
161 164 r'SCRIPT_NAME': r'/script',
162 165 r'PATH_INFO': r'/pathinfo',
163 166 })
164 167
165 168 self.assertEqual(r.url, b'http://testserver/script/pathinfo')
166 169 self.assertEqual(r.baseurl, b'http://testserver')
167 170 self.assertEqual(r.advertisedurl, b'http://testserver/script/pathinfo')
168 171 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
169 172 self.assertEqual(r.apppath, b'/script')
170 173 self.assertEqual(r.dispatchparts, [b'pathinfo'])
171 174 self.assertEqual(r.dispatchpath, b'pathinfo')
172 175 self.assertTrue(r.havepathinfo)
173 176
174 177 r = parse(DEFAULT_ENV, extra={
175 178 r'SCRIPT_NAME': r'/script1/script2',
176 179 r'PATH_INFO': r'/path1/path2',
177 180 })
178 181
179 182 self.assertEqual(r.url,
180 183 b'http://testserver/script1/script2/path1/path2')
181 184 self.assertEqual(r.baseurl, b'http://testserver')
182 185 self.assertEqual(r.advertisedurl,
183 186 b'http://testserver/script1/script2/path1/path2')
184 187 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
185 188 self.assertEqual(r.apppath, b'/script1/script2')
186 189 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
187 190 self.assertEqual(r.dispatchpath, b'path1/path2')
188 191 self.assertTrue(r.havepathinfo)
189 192
190 193 r = parse(DEFAULT_ENV, extra={
191 194 r'HTTP_HOST': r'hostserver',
192 195 r'SCRIPT_NAME': r'/script',
193 196 r'PATH_INFO': r'/pathinfo',
194 197 })
195 198
196 199 self.assertEqual(r.url, b'http://hostserver/script/pathinfo')
197 200 self.assertEqual(r.baseurl, b'http://hostserver')
198 201 self.assertEqual(r.advertisedurl, b'http://testserver/script/pathinfo')
199 202 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
200 203 self.assertEqual(r.apppath, b'/script')
201 204 self.assertEqual(r.dispatchparts, [b'pathinfo'])
202 205 self.assertEqual(r.dispatchpath, b'pathinfo')
203 206 self.assertTrue(r.havepathinfo)
204 207
205 208 def testreponame(self):
206 """REPO_NAME path components get stripped from URL."""
207 r = parse(DEFAULT_ENV, extra={
208 r'REPO_NAME': r'repo',
209 r'PATH_INFO': r'/path1/path2'
210 })
209 """repository path components get stripped from URL."""
210
211 with self.assertRaisesRegexp(error.ProgrammingError,
212 b'reponame requires PATH_INFO'):
213 parse(DEFAULT_ENV, reponame=b'repo')
211 214
212 self.assertEqual(r.url, b'http://testserver/path1/path2')
213 self.assertEqual(r.baseurl, b'http://testserver')
214 self.assertEqual(r.advertisedurl, b'http://testserver/path1/path2')
215 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
216 self.assertEqual(r.apppath, b'/repo')
217 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
218 self.assertEqual(r.dispatchpath, b'path1/path2')
219 self.assertTrue(r.havepathinfo)
220 self.assertEqual(r.reponame, b'repo')
215 with self.assertRaisesRegexp(error.ProgrammingError,
216 b'PATH_INFO does not begin with repo '
217 b'name'):
218 parse(DEFAULT_ENV, reponame=b'repo', extra={
219 r'PATH_INFO': r'/pathinfo',
220 })
221 221
222 r = parse(DEFAULT_ENV, extra={
223 r'REPO_NAME': r'repo',
222 with self.assertRaisesRegexp(error.ProgrammingError,
223 b'reponame prefix of PATH_INFO'):
224 parse(DEFAULT_ENV, reponame=b'repo', extra={
225 r'PATH_INFO': r'/repoextra/path',
226 })
227
228 r = parse(DEFAULT_ENV, reponame=b'repo', extra={
224 229 r'PATH_INFO': r'/repo/path1/path2',
225 230 })
226 231
227 232 self.assertEqual(r.url, b'http://testserver/repo/path1/path2')
228 233 self.assertEqual(r.baseurl, b'http://testserver')
229 234 self.assertEqual(r.advertisedurl, b'http://testserver/repo/path1/path2')
230 235 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
231 236 self.assertEqual(r.apppath, b'/repo')
232 237 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
233 238 self.assertEqual(r.dispatchpath, b'path1/path2')
234 239 self.assertTrue(r.havepathinfo)
235 240 self.assertEqual(r.reponame, b'repo')
236 241
237 r = parse(DEFAULT_ENV, extra={
238 r'REPO_NAME': r'prefix/repo',
242 r = parse(DEFAULT_ENV, reponame=b'prefix/repo', extra={
239 243 r'PATH_INFO': r'/prefix/repo/path1/path2',
240 244 })
241 245
242 246 self.assertEqual(r.url, b'http://testserver/prefix/repo/path1/path2')
243 247 self.assertEqual(r.baseurl, b'http://testserver')
244 248 self.assertEqual(r.advertisedurl,
245 249 b'http://testserver/prefix/repo/path1/path2')
246 250 self.assertEqual(r.advertisedbaseurl, b'http://testserver')
247 251 self.assertEqual(r.apppath, b'/prefix/repo')
248 252 self.assertEqual(r.dispatchparts, [b'path1', b'path2'])
249 253 self.assertEqual(r.dispatchpath, b'path1/path2')
250 254 self.assertTrue(r.havepathinfo)
251 255 self.assertEqual(r.reponame, b'prefix/repo')
252 256
253 257 if __name__ == '__main__':
254 258 import silenttestrunner
255 259 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now