From ede79361b5507abb185faa8359d65b9f45f9687e 2011-09-12 23:47:16 From: Fernando Perez Date: 2011-09-12 23:47:16 Subject: [PATCH] Merge pull request #770 from minrk/jsonclean Ensures all replies from ipkernel are clean for json (not just oinfo), and guesses stdin.encoding before using sys.getdefaultencoding in json_clean. --- diff --git a/IPython/config/loader.py b/IPython/config/loader.py index 3cbd132..4786e35 100644 --- a/IPython/config/loader.py +++ b/IPython/config/loader.py @@ -24,7 +24,7 @@ import sys from IPython.external import argparse from IPython.utils.path import filefind, get_ipython_dir -from IPython.utils import py3compat, warn +from IPython.utils import py3compat, text, warn #----------------------------------------------------------------------------- # Exceptions @@ -425,7 +425,7 @@ class KeyValueConfigLoader(CommandLineConfigLoader): """decode argv if bytes, using stin.encoding, falling back on default enc""" uargv = [] if enc is None: - enc = sys.stdin.encoding or sys.getdefaultencoding() + enc = text.getdefaultencoding() for arg in argv: if not isinstance(arg, unicode): # only decode if not already decoded @@ -586,7 +586,8 @@ class ArgParseConfigLoader(CommandLineConfigLoader): def _parse_args(self, args): """self.parser->self.parsed_data""" # decode sys.argv to support unicode command-line options - uargs = [py3compat.cast_unicode(a) for a in args] + enc = text.getdefaultencoding() + uargs = [py3compat.cast_unicode(a, enc) for a in args] self.parsed_data, self.extra_args = self.parser.parse_known_args(uargs) def _convert_to_config(self): diff --git a/IPython/utils/_process_win32.py b/IPython/utils/_process_win32.py index 3a273c0..f1dd502 100644 --- a/IPython/utils/_process_win32.py +++ b/IPython/utils/_process_win32.py @@ -23,6 +23,7 @@ from subprocess import STDOUT # our own imports from ._process_common import read_no_interrupt, process_handler +from . import text #----------------------------------------------------------------------------- # Function definitions @@ -88,7 +89,7 @@ def _find_cmd(cmd): def _system_body(p): """Callback for _system.""" - enc = sys.stdin.encoding or sys.getdefaultencoding() + enc = text.getdefaultencoding() for line in read_no_interrupt(p.stdout).splitlines(): line = line.decode(enc, 'replace') print(line, file=sys.stdout) diff --git a/IPython/utils/jsonutil.py b/IPython/utils/jsonutil.py index 06ccb3d..52ac240 100644 --- a/IPython/utils/jsonutil.py +++ b/IPython/utils/jsonutil.py @@ -17,6 +17,7 @@ import types from datetime import datetime from IPython.utils import py3compat +from IPython.utils import text next_attr_name = '__next__' if py3compat.PY3 else 'next' #----------------------------------------------------------------------------- @@ -134,7 +135,7 @@ def json_clean(obj): return obj if isinstance(obj, bytes): - return obj.decode(sys.getdefaultencoding(), 'replace') + return obj.decode(text.getdefaultencoding(), 'replace') if isinstance(obj, container_to_list) or ( hasattr(obj, '__iter__') and hasattr(obj, next_attr_name)): diff --git a/IPython/utils/text.py b/IPython/utils/text.py index bf4a143..dfe9454 100644 --- a/IPython/utils/text.py +++ b/IPython/utils/text.py @@ -16,9 +16,11 @@ Utilities for working with strings and text. import __main__ +import locale import os import re import shutil +import sys import textwrap from string import Formatter @@ -31,6 +33,28 @@ from IPython.utils.data import flatten # Code #----------------------------------------------------------------------------- +# Less conservative replacement for sys.getdefaultencoding, that will try +# to match the environment. +# Defined here as central function, so if we find better choices, we +# won't need to make changes all over IPython. +def getdefaultencoding(): + """Return IPython's guess for the default encoding for bytes as text. + + Asks for stdin.encoding first, to match the calling Terminal, but that + is often None for subprocesses. Fall back on locale.getpreferredencoding() + which should be a sensible platform default (that respects LANG environment), + and finally to sys.getdefaultencoding() which is the most conservative option, + and usually ASCII. + """ + enc = sys.stdin.encoding + if not enc: + try: + # There are reports of getpreferredencoding raising errors + # in some cases, which may well be fixed, but let's be conservative here. + enc = locale.getpreferredencoding(False) + except Exception: + pass + return enc or sys.getdefaultencoding() def unquote_ends(istr): """Remove a single pair of quotes from the endpoints of a string.""" diff --git a/IPython/zmq/iostream.py b/IPython/zmq/iostream.py index 5fcfb98..8579398 100644 --- a/IPython/zmq/iostream.py +++ b/IPython/zmq/iostream.py @@ -4,7 +4,7 @@ from io import StringIO from session import extract_header, Message -from IPython.utils import io +from IPython.utils import io, text #----------------------------------------------------------------------------- # Globals @@ -69,7 +69,7 @@ class OutStream(object): else: # Make sure that we're handling unicode if not isinstance(string, unicode): - enc = sys.stdin.encoding or sys.getdefaultencoding() + enc = text.getdefaultencoding() string = string.decode(enc, 'replace') self._buffer.write(string) diff --git a/IPython/zmq/ipkernel.py b/IPython/zmq/ipkernel.py index 264d6b1..0df9093 100755 --- a/IPython/zmq/ipkernel.py +++ b/IPython/zmq/ipkernel.py @@ -303,6 +303,7 @@ class Kernel(Configurable): time.sleep(self._execute_sleep) # Send the reply. + reply_content = json_clean(reply_content) reply_msg = self.session.send(self.shell_socket, u'execute_reply', reply_content, parent, ident=ident) self.log.debug(str(reply_msg)) @@ -321,6 +322,7 @@ class Kernel(Configurable): matches = {'matches' : matches, 'matched_text' : txt, 'status' : 'ok'} + matches = json_clean(matches) completion_msg = self.session.send(self.shell_socket, 'complete_reply', matches, parent, ident) self.log.debug(str(completion_msg)) @@ -358,6 +360,7 @@ class Kernel(Configurable): else: hist = [] content = {'history' : list(hist)} + content = json_clean(content) msg = self.session.send(self.shell_socket, 'history_reply', content, parent, ident) self.log.debug(str(msg)) @@ -409,7 +412,7 @@ class Kernel(Configurable): sys.stdout.flush() # Send the input request. - content = dict(prompt=prompt) + content = json_clean(dict(prompt=prompt)) msg = self.session.send(self.stdin_socket, u'input_request', content, parent) # Await a response. diff --git a/IPython/zmq/zmqshell.py b/IPython/zmq/zmqshell.py index ac7df79..2bbf2a9 100644 --- a/IPython/zmq/zmqshell.py +++ b/IPython/zmq/zmqshell.py @@ -30,6 +30,7 @@ from IPython.core.macro import Macro from IPython.core.magic import MacroToEdit from IPython.core.payloadpage import install_payload_page from IPython.utils import io +from IPython.utils.jsonutil import json_clean from IPython.utils.path import get_py_filename from IPython.utils.traitlets import Instance, Type, Dict, CBool from IPython.utils.warn import warn @@ -69,7 +70,7 @@ class ZMQDisplayPublisher(DisplayPublisher): content['data'] = data content['metadata'] = metadata self.session.send( - self.pub_socket, u'display_data', content, + self.pub_socket, u'display_data', json_clean(content), parent=self.parent_header ) @@ -144,7 +145,7 @@ class ZMQInteractiveShell(InteractiveShell): dh = self.displayhook # Send exception info over pub socket for other clients than the caller # to pick up - exc_msg = dh.session.send(dh.pub_socket, u'pyerr', exc_content, dh.parent_header) + exc_msg = dh.session.send(dh.pub_socket, u'pyerr', json_clean(exc_content), dh.parent_header) # FIXME - Hack: store exception info in shell object. Right now, the # caller is reading this info after the fact, we need to fix this logic