# HG changeset patch # User Matt Mackall # Date 2011-07-30 23:46:52 # Node ID d06b9c55ddabed66f7e1b5f4193534957232de95 # Parent dd74cd1e5d49cdedfd2a0cf142a9ce1178e4b748 hgweb: raw file mimetype guessing configurable, off by default (BC) (issue2923) Before: hgweb made it possible to download file content with a content type detected from the file extension. It would serve .html files as text/html and could thus cause XSS vulnerabilities if the web site had any kind of session authorization and the repository content wasn't fully trusted. Now: all files default to "application/binary", which all important browsers will refuse to treat as text/html. See the table here: https://code.google.com/p/browsersec/wiki/Part2#Survey_of_content_sniffing_behaviors diff --git a/mercurial/help/config.txt b/mercurial/help/config.txt --- a/mercurial/help/config.txt +++ b/mercurial/help/config.txt @@ -1154,6 +1154,13 @@ The full set of options is: be present in this list. The contents of the allow_push list are examined after the deny_push list. +``guessmime`` + Control MIME types for raw download of file content. + Set to True to let hgweb guess the content type from the file + extension. This will serve HTML files as ``text/html`` and might + allow cross-site scripting attacks when serving untrusted + repositories. Default is False. + ``allow_read`` If the user has not already been denied repository access due to the contents of deny_read, this list determines whether to grant diff --git a/mercurial/hgweb/webcommands.py b/mercurial/hgweb/webcommands.py --- a/mercurial/hgweb/webcommands.py +++ b/mercurial/hgweb/webcommands.py @@ -32,6 +32,8 @@ def log(web, req, tmpl): return changelog(web, req, tmpl) def rawfile(web, req, tmpl): + guessmime = web.configbool('web', 'guessmime', False) + path = webutil.cleanpath(web.repo, req.form.get('file', [''])[0]) if not path: content = manifest(web, req, tmpl) @@ -50,9 +52,11 @@ def rawfile(web, req, tmpl): path = fctx.path() text = fctx.data() - mt = mimetypes.guess_type(path)[0] - if mt is None: - mt = binary(text) and 'application/octet-stream' or 'text/plain' + mt = 'application/binary' + if guessmime: + mt = mimetypes.guess_type(path)[0] + if mt is None: + mt = binary(text) and 'application/binary' or 'text/plain' if mt.startswith('text/'): mt += '; charset="%s"' % encoding.encoding diff --git a/tests/test-hgweb-raw.t b/tests/test-hgweb-raw.t --- a/tests/test-hgweb-raw.t +++ b/tests/test-hgweb-raw.t @@ -22,6 +22,28 @@ Test raw style of hgweb $ sleep 1 # wait for server to scream and die $ cat getoutput.txt 200 Script output follows + content-type: application/binary + content-length: 157 + content-disposition: inline; filename="some \"text\".txt" + + This is just some random text + that will go inside the file and take a few lines. + It is very boring to read, but computers don't + care about things like that. + $ cat access.log error.log + 127.0.0.1 - - [*] "GET /?f=a23bf1310f6e;file=sub/some%20%22text%22.txt;style=raw HTTP/1.1" 200 - (glob) + + $ rm access.log error.log + $ hg serve -p $HGPORT -A access.log -E error.log -d --pid-file=hg.pid \ + > --config web.guessmime=True + + $ cat hg.pid >> $DAEMON_PIDS + $ ("$TESTDIR/get-with-headers.py" localhost:$HGPORT '/?f=a23bf1310f6e;file=sub/some%20%22text%22.txt;style=raw' content-type content-length content-disposition) >getoutput.txt & + $ sleep 5 + $ kill `cat hg.pid` + $ sleep 1 # wait for server to scream and die + $ cat getoutput.txt + 200 Script output follows content-type: text/plain; charset="ascii" content-length: 157 content-disposition: inline; filename="some \"text\".txt"