# HG changeset patch # User Gregory Szorc # Date 2018-03-08 19:21:46 # Node ID 3c15b84ab66caa4ddba394beb92fed9de2caead2 # Parent 0031e972ded2343df2abba928ffa3cfbf8db1f50 hgweb: teach WSGI parser about query strings Currently, req.form uses cgi.parse() to populate form data. Depending on the request, form data can come from POST multipart/form-data, application/x-www-form-urlencoded, or the URL query string. Putting all these things into one data structure makes it difficult to reason about how exactly parameters got to the request. It can lead to wonkiness such as pulling parameters from both the URL and POST data. This commit teaches our WSGI request parser about argument data in query strings. We populate fields containing the query string data and only the query string data so it can't be confused with POST data. Differential Revision: https://phab.mercurial-scm.org/D2737 diff --git a/mercurial/hgweb/request.py b/mercurial/hgweb/request.py --- a/mercurial/hgweb/request.py +++ b/mercurial/hgweb/request.py @@ -78,6 +78,10 @@ class parsedrequest(object): dispatchpath = attr.ib() # Raw query string (part after "?" in URL). querystring = attr.ib() + # List of 2-tuples of query string arguments. + querystringlist = attr.ib() + # Dict of query string arguments. Values are lists with at least 1 item. + querystringdict = attr.ib() def parserequestfromenv(env): """Parse URL components from environment variables. @@ -168,12 +172,25 @@ def parserequestfromenv(env): querystring = env.get('QUERY_STRING', '') + # We store as a list so we have ordering information. We also store as + # a dict to facilitate fast lookup. + querystringlist = util.urlreq.parseqsl(querystring, keep_blank_values=True) + + querystringdict = {} + for k, v in querystringlist: + if k in querystringdict: + querystringdict[k].append(v) + else: + querystringdict[k] = [v] + return parsedrequest(url=fullurl, baseurl=baseurl, advertisedurl=advertisedfullurl, advertisedbaseurl=advertisedbaseurl, apppath=apppath, dispatchparts=dispatchparts, dispatchpath=dispatchpath, - querystring=querystring) + querystring=querystring, + querystringlist=querystringlist, + querystringdict=querystringdict) class wsgirequest(object): """Higher-level API for a WSGI request. diff --git a/mercurial/urllibcompat.py b/mercurial/urllibcompat.py --- a/mercurial/urllibcompat.py +++ b/mercurial/urllibcompat.py @@ -48,6 +48,7 @@ if pycompat.ispy3: "urlunparse", )) urlreq._registeralias(urllib.parse, "parse_qs", "parseqs") + urlreq._registeralias(urllib.parse, "parse_qsl", "parseqsl") urlreq._registeralias(urllib.parse, "unquote_to_bytes", "unquote") import urllib.request urlreq._registeraliases(urllib.request, ( @@ -159,6 +160,7 @@ else: "urlunparse", )) urlreq._registeralias(urlparse, "parse_qs", "parseqs") + urlreq._registeralias(urlparse, "parse_qsl", "parseqsl") urlerr._registeraliases(urllib2, ( "HTTPError", "URLError",