# HG changeset patch # User Gregory Szorc # Date 2017-03-13 19:16:47 # Node ID fb1f70331ee663f29db0501d8dada622175becd5 # Parent 1ed169c5e235b66aaa7dba8cbb5e3df5c2ca808f pycompat: custom implementation of urllib.parse.quote() urllib.parse.quote() accepts either str or bytes and returns str. There exists a urllib.parse.quote_from_bytes() which only accepts bytes. We should probably use that to retain strong typing and avoid surprises. In addition, since nearly all strings in Mercurial are bytes, we probably don't want quote() returning unicode. So, this patch implements a custom quote() that only accepts bytes and returns bytes. The quoted URL should only contain URL safe characters which is a strict subset of ASCII. So `.encode('ascii', 'strict')` should be safe. diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py --- a/mercurial/pycompat.py +++ b/mercurial/pycompat.py @@ -269,7 +269,6 @@ if not ispy3: else: import urllib.parse urlreq._registeraliases(urllib.parse, ( - "quote", "splitattr", "splitpasswd", "splitport", @@ -313,3 +312,12 @@ else: "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler", )) + + # urllib.parse.quote() accepts both str and bytes, decodes bytes + # (if necessary), and returns str. This is wonky. We provide a custom + # implementation that only accepts bytes and emits bytes. + def quote(s, safe=r'/'): + s = urllib.parse.quote_from_bytes(s, safe=safe) + return s.encode('ascii', 'strict') + + urlreq.quote = quote