# HG changeset patch # User Gregory Szorc # Date 2019-11-02 19:09:35 # Node ID 579672b347d2169a4abfb372873972a0fd6a839c # Parent bb509f39d387ae18214115e341a13417c197dd75 py3: define and use json.loads polyfill Python 3.5's json.loads() requires a str. Only Python 3.6+ supports passing a bytes or bytearray. This commit implements a json.loads() polyfill on Python 3.5 so that we can use bytes. The added function to detect encodings comes verbatim from Python 3.7. diff --git a/hgext/bugzilla.py b/hgext/bugzilla.py --- a/hgext/bugzilla.py +++ b/hgext/bugzilla.py @@ -955,7 +955,7 @@ class bzrestapi(bzaccess): def _fetch(self, burl): try: resp = url.open(self.ui, burl) - return json.loads(resp.read()) + return pycompat.json_loads(resp.read()) except util.urlerr.httperror as inst: if inst.code == 401: raise error.Abort(_(b'authorization failed')) @@ -978,7 +978,7 @@ class bzrestapi(bzaccess): req = request_type(burl, data, {b'Content-Type': b'application/json'}) try: resp = url.opener(self.ui).open(req) - return json.loads(resp.read()) + return pycompat.json_loads(resp.read()) except util.urlerr.httperror as inst: if inst.code == 401: raise error.Abort(_(b'authorization failed')) diff --git a/hgext/fix.py b/hgext/fix.py --- a/hgext/fix.py +++ b/hgext/fix.py @@ -126,7 +126,6 @@ from __future__ import absolute_import import collections import itertools -import json import os import re import subprocess @@ -642,7 +641,7 @@ def fixfile(ui, repo, opts, fixers, fixc if fixer.shouldoutputmetadata(): try: metadatajson, newerdata = stdout.split(b'\0', 1) - metadata[fixername] = json.loads(metadatajson) + metadata[fixername] = pycompat.json_loads(metadatajson) except ValueError: ui.warn( _(b'ignored invalid output from fixer tool: %s\n') diff --git a/hgext/lfs/blobstore.py b/hgext/lfs/blobstore.py --- a/hgext/lfs/blobstore.py +++ b/hgext/lfs/blobstore.py @@ -363,7 +363,7 @@ class _gitlfsremote(object): _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint ) try: - response = json.loads(rawjson) + response = pycompat.json_loads(rawjson) except ValueError: raise LfsRemoteError( _(b'LFS server returns invalid JSON: %s') diff --git a/hgext/lfs/wireprotolfsserver.py b/hgext/lfs/wireprotolfsserver.py --- a/hgext/lfs/wireprotolfsserver.py +++ b/hgext/lfs/wireprotolfsserver.py @@ -133,7 +133,7 @@ def _processbatchrequest(repo, req, res) return True # XXX: specify an encoding? - lfsreq = json.loads(req.bodyfh.read()) + lfsreq = pycompat.json_loads(req.bodyfh.read()) # If no transfer handlers are explicitly requested, 'basic' is assumed. if r'basic' not in lfsreq.get(r'transfers', [r'basic']): diff --git a/hgext/phabricator.py b/hgext/phabricator.py --- a/hgext/phabricator.py +++ b/hgext/phabricator.py @@ -152,8 +152,8 @@ def vcrcommand(name, flags, spec, helpca value = r1params[key][0] # we want to compare json payloads without worrying about ordering if value.startswith(b'{') and value.endswith(b'}'): - r1json = json.loads(value) - r2json = json.loads(r2params[key][0]) + r1json = pycompat.json_loads(value) + r2json = pycompat.json_loads(r2params[key][0]) if r1json != r2json: return False elif r2params[key][0] != value: @@ -307,7 +307,7 @@ def callconduit(ui, name, params): if isinstance(x, pycompat.unicode) else x, # json.loads only accepts bytes from py3.6+ - json.loads(encoding.unifromlocal(body)), + pycompat.json_loads(encoding.unifromlocal(body)), ) if parsed.get(b'error_code'): msg = _(b'Conduit Error (%s): %s') % ( @@ -332,7 +332,7 @@ def debugcallconduit(ui, repo, name): lambda x: encoding.unitolocal(x) if isinstance(x, pycompat.unicode) else x, - json.loads(rawparams), + pycompat.json_loads(rawparams), ) # json.dumps only accepts unicode strings result = pycompat.rapply( diff --git a/mercurial/pycompat.py b/mercurial/pycompat.py --- a/mercurial/pycompat.py +++ b/mercurial/pycompat.py @@ -12,6 +12,7 @@ from __future__ import absolute_import import getopt import inspect +import json import os import shlex import sys @@ -88,6 +89,7 @@ def rapply(f, xs): if ispy3: import builtins + import codecs import functools import io import struct @@ -340,6 +342,48 @@ if ispy3: iteritems = lambda x: x.items() itervalues = lambda x: x.values() + # Python 3.5's json.load and json.loads require str. We polyfill its + # code for detecting encoding from bytes. + if sys.version_info[0:2] < (3, 6): + + def _detect_encoding(b): + bstartswith = b.startswith + if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + return 'utf-32' + if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + return 'utf-16' + if bstartswith(codecs.BOM_UTF8): + return 'utf-8-sig' + + if len(b) >= 4: + if not b[0]: + # 00 00 -- -- - utf-32-be + # 00 XX -- -- - utf-16-be + return 'utf-16-be' if b[1] else 'utf-32-be' + if not b[1]: + # XX 00 00 00 - utf-32-le + # XX 00 00 XX - utf-16-le + # XX 00 XX -- - utf-16-le + return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' + elif len(b) == 2: + if not b[0]: + # 00 XX - utf-16-be + return 'utf-16-be' + if not b[1]: + # XX 00 - utf-16-le + return 'utf-16-le' + # default + return 'utf-8' + + def json_loads(s, *args, **kwargs): + if isinstance(s, (bytes, bytearray)): + s = s.decode(_detect_encoding(s), 'surrogatepass') + + return json.loads(s, *args, **kwargs) + + else: + json_loads = json.loads + else: import cStringIO @@ -417,6 +461,7 @@ else: getargspec = inspect.getargspec iteritems = lambda x: x.iteritems() itervalues = lambda x: x.itervalues() + json_loads = json.loads isjython = sysplatform.startswith(b'java') diff --git a/tests/get-with-headers.py b/tests/get-with-headers.py --- a/tests/get-with-headers.py +++ b/tests/get-with-headers.py @@ -98,7 +98,7 @@ def request(host, path, show): if formatjson: # json.dumps() will print trailing newlines. Eliminate them # to make tests easier to write. - data = json.loads(data) + data = pycompat.json_loads(data) lines = json.dumps(data, sort_keys=True, indent=2).splitlines() for line in lines: bodyfh.write(pycompat.sysbytes(line.rstrip()))