From a6a9d5803e479de956d9003cd293b91da5d6a8a4 2011-03-24 23:54:23 From: Thomas Kluyver Date: 2011-03-24 23:54:23 Subject: [PATCH] Merge branch 'unicode-issues' --- diff --git a/IPython/config/loader.py b/IPython/config/loader.py index 296582c..6a1205a 100644 --- a/IPython/config/loader.py +++ b/IPython/config/loader.py @@ -285,7 +285,9 @@ class PyFileConfigLoader(FileConfigLoader): return self.config namespace = dict(load_subconfig=load_subconfig, get_config=get_config) - execfile(self.full_filename, namespace) + fs_encoding = sys.getfilesystemencoding() or 'ascii' + conf_filename = self.full_filename.encode(fs_encoding) + execfile(conf_filename, namespace) def _convert_to_config(self): if self.data is None: diff --git a/IPython/core/application.py b/IPython/core/application.py index 915cf16..fc600d2 100644 --- a/IPython/core/application.py +++ b/IPython/core/application.py @@ -353,18 +353,22 @@ class Application(object): # our shipped copies of builtin profiles even if they don't have them # in their local ipython directory. prof_dir = os.path.join(get_ipython_package_dir(), 'config', 'profile') - self.config_file_paths = (os.getcwd(), self.ipython_dir, prof_dir) + self.config_file_paths = (os.getcwdu(), self.ipython_dir, prof_dir) def pre_load_file_config(self): """Do actions before the config file is loaded.""" pass - def load_file_config(self): + def load_file_config(self, suppress_errors=True): """Load the config file. This tries to load the config file from disk. If successful, the ``CONFIG_FILE`` config variable is set to the resolved config file location. If not successful, an empty config is used. + + By default, errors in loading config are handled, and a warning + printed on screen. For testing, the suppress_errors option is set + to False, so errors will make tests fail. """ self.log.debug("Attempting to load config file: %s" % self.config_file_name) @@ -380,6 +384,8 @@ class Application(object): self.config_file_name, exc_info=True) self.file_config = Config() except: + if not suppress_errors: # For testing purposes + raise self.log.warn("Error loading config file: %s" % self.config_file_name, exc_info=True) self.file_config = Config() diff --git a/IPython/core/compilerop.py b/IPython/core/compilerop.py index 29c9f68..e92be98 100644 --- a/IPython/core/compilerop.py +++ b/IPython/core/compilerop.py @@ -38,8 +38,10 @@ import time def code_name(code, number=0): """ Compute a (probably) unique name for code for caching. + + This now expects code to be unicode. """ - hash_digest = hashlib.md5(code).hexdigest() + hash_digest = hashlib.md5(code.encode("utf-8")).hexdigest() # Include the number and 12 characters of the hash in the name. It's # pretty much impossible that in a single session we'll have collisions # even with truncated hashes, and the full one makes tracebacks too long diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py index c54f922..915fd8c 100644 --- a/IPython/core/inputsplitter.py +++ b/IPython/core/inputsplitter.py @@ -66,6 +66,7 @@ from __future__ import print_function # Imports #----------------------------------------------------------------------------- # stdlib +import ast import codeop import re import sys @@ -185,9 +186,6 @@ def split_blocks(python): commands : list of str Separate commands that can be exec'ed independently. """ - - import compiler - # compiler.parse treats trailing spaces after a newline as a # SyntaxError. This is different than codeop.CommandCompiler, which # will compile the trailng spaces just fine. We simply strip any @@ -197,22 +195,15 @@ def split_blocks(python): python_ori = python # save original in case we bail on error python = python.strip() - # The compiler module does not like unicode. We need to convert - # it encode it: - if isinstance(python, unicode): - # Use the utf-8-sig BOM so the compiler detects this a UTF-8 - # encode string. - python = '\xef\xbb\xbf' + python.encode('utf-8') - # The compiler module will parse the code into an abstract syntax tree. # This has a bug with str("a\nb"), but not str("""a\nb""")!!! try: - ast = compiler.parse(python) + code_ast = ast.parse(python) except: return [python_ori] # Uncomment to help debug the ast tree - # for n in ast.node: + # for n in code_ast.body: # print n.lineno,'->',n # Each separate command is available by iterating over ast.node. The @@ -223,14 +214,7 @@ def split_blocks(python): # other situations that cause Discard nodes that shouldn't be discarded. # We might eventually discover other cases where lineno is None and have # to put in a more sophisticated test. - linenos = [x.lineno-1 for x in ast.node if x.lineno is not None] - - # When we have a bare string as the first statement, it does not end up as - # a Discard Node in the AST as we might expect. Instead, it gets interpreted - # as the docstring of the module. Check for this case and prepend 0 (the - # first line number) to the list of linenos to account for it. - if ast.doc is not None: - linenos.insert(0, 0) + linenos = [x.lineno-1 for x in code_ast.body if x.lineno is not None] # When we finally get the slices, we will need to slice all the way to # the end even though we don't have a line number for it. Fortunately, @@ -603,7 +587,7 @@ class InputSplitter(object): If input lines are not newline-terminated, a newline is automatically appended.""" - + if buffer is None: buffer = self._buffer @@ -614,7 +598,7 @@ class InputSplitter(object): setattr(self, store, self._set_source(buffer)) def _set_source(self, buffer): - return ''.join(buffer).encode(self.encoding) + return u''.join(buffer) #----------------------------------------------------------------------------- diff --git a/IPython/core/interactiveshell.py b/IPython/core/interactiveshell.py index e2e7ebe..f4756bc 100644 --- a/IPython/core/interactiveshell.py +++ b/IPython/core/interactiveshell.py @@ -1550,12 +1550,14 @@ class InteractiveShell(Configurable, Magic): # otherwise we end up with a monster history after a while: readline.set_history_length(self.history_length) + stdin_encoding = sys.stdin.encoding or "utf-8" + # Load the last 1000 lines from history for _, _, cell in self.history_manager.get_tail(1000, include_latest=True): if cell.strip(): # Ignore blank lines for line in cell.splitlines(): - readline.add_history(line) + readline.add_history(line.encode(stdin_encoding)) # Configure auto-indent for all platforms self.set_autoindent(self.autoindent) @@ -2105,7 +2107,6 @@ class InteractiveShell(Configurable, Magic): if len(cell.splitlines()) <= 1: cell = self.prefilter_manager.prefilter_line(blocks[0]) blocks = self.input_splitter.split_blocks(cell) - # Store the 'ipython' version of the cell as well, since that's what # needs to go into the translated history and get executed (the @@ -2246,7 +2247,7 @@ class InteractiveShell(Configurable, Magic): else: usource = source - if 0: # dbg + if False: # dbg print 'Source:', repr(source) # dbg print 'USource:', repr(usource) # dbg print 'type:', type(source) # dbg diff --git a/IPython/core/magic.py b/IPython/core/magic.py index e01281c..afcced7 100644 --- a/IPython/core/magic.py +++ b/IPython/core/magic.py @@ -2063,7 +2063,8 @@ Currently the magic system has the following functions:\n""" return cmds = self.extract_input_lines(ranges, 'r' in opts) with open(fname,'w') as f: - f.write(cmds) + f.write("# coding: utf-8\n") + f.write(cmds.encode("utf-8")) print 'The following commands were written to file `%s`:' % fname print cmds diff --git a/IPython/core/tests/test_application.py b/IPython/core/tests/test_application.py new file mode 100644 index 0000000..24e0f61 --- /dev/null +++ b/IPython/core/tests/test_application.py @@ -0,0 +1,68 @@ +# coding: utf-8 +"""Tests for IPython.core.application""" + +import os +import tempfile + +from IPython.core.application import Application + +def test_unicode_cwd(): + """Check that IPython starts with non-ascii characters in the path.""" + wd = tempfile.mkdtemp(suffix=u"€") + + old_wd = os.getcwdu() + os.chdir(wd) + #raise Exception(repr(os.getcwd())) + try: + app = Application() + # The lines below are copied from Application.initialize() + app.create_default_config() + app.log_default_config() + app.set_default_config_log_level() + + # Find resources needed for filesystem access, using information from + # the above two + app.find_ipython_dir() + app.find_resources() + app.find_config_file_name() + app.find_config_file_paths() + + # File-based config + app.pre_load_file_config() + app.load_file_config(suppress_errors=False) + finally: + os.chdir(old_wd) + +def test_unicode_ipdir(): + """Check that IPython starts with non-ascii characters in the IP dir.""" + ipdir = tempfile.mkdtemp(suffix=u"€") + + # Create the config file, so it tries to load it. + with open(os.path.join(ipdir, 'ipython_config.py'), "w") as f: + pass + + old_ipdir1 = os.environ.pop("IPYTHONDIR", None) + old_ipdir2 = os.environ.pop("IPYTHON_DIR", None) + os.environ["IPYTHONDIR"] = ipdir.encode("utf-8") + try: + app = Application() + # The lines below are copied from Application.initialize() + app.create_default_config() + app.log_default_config() + app.set_default_config_log_level() + + # Find resources needed for filesystem access, using information from + # the above two + app.find_ipython_dir() + app.find_resources() + app.find_config_file_name() + app.find_config_file_paths() + + # File-based config + app.pre_load_file_config() + app.load_file_config(suppress_errors=False) + finally: + if old_ipdir1: + os.environ["IPYTHONDIR"] = old_ipdir1 + if old_ipdir2: + os.environ["IPYTHONDIR"] = old_ipdir2 diff --git a/IPython/core/tests/test_compilerop.py b/IPython/core/tests/test_compilerop.py index 3d133ae..2fac2fc 100644 --- a/IPython/core/tests/test_compilerop.py +++ b/IPython/core/tests/test_compilerop.py @@ -1,3 +1,4 @@ +# coding: utf-8 """Tests for the compilerop module. """ #----------------------------------------------------------------------------- @@ -15,6 +16,7 @@ from __future__ import print_function # Stdlib imports import linecache +import sys # Third-party imports import nose.tools as nt @@ -46,6 +48,16 @@ def test_compiler(): cp('x=1', 'single') nt.assert_true(len(linecache.cache) > ncache) +def setUp(): + # Check we're in a proper Python 2 environment (some imports, such + # as GTK, can change the default encoding, which can hide bugs.) + nt.assert_equal(sys.getdefaultencoding(), "ascii") + +def test_compiler_unicode(): + cp = compilerop.CachingCompiler() + ncache = len(linecache.cache) + cp(u"t = 'žćčšđ'", "single") + nt.assert_true(len(linecache.cache) > ncache) def test_compiler_check_cache(): """Test the compiler properly manages the cache. diff --git a/IPython/core/tests/test_history.py b/IPython/core/tests/test_history.py index dc440f5..a2d3353 100644 --- a/IPython/core/tests/test_history.py +++ b/IPython/core/tests/test_history.py @@ -1,3 +1,4 @@ +# coding: utf-8 """Tests for the IPython tab-completion machinery. """ #----------------------------------------------------------------------------- @@ -16,8 +17,10 @@ import nose.tools as nt from IPython.utils.tempdir import TemporaryDirectory from IPython.core.history import HistoryManager, extract_hist_ranges -def test_history(): +def setUp(): + nt.assert_equal(sys.getdefaultencoding(), "ascii") +def test_history(): ip = get_ipython() with TemporaryDirectory() as tmpdir: #tmpdir = '/software/temp' @@ -32,7 +35,7 @@ def test_history(): ip.history_manager.init_db() # Has to be called after changing file ip.history_manager.reset() print 'test',histfile - hist = ['a=1', 'def f():\n test = 1\n return test', 'b=2'] + hist = ['a=1', 'def f():\n test = 1\n return test', u"b='€Æ¾÷ß'"] for i, h in enumerate(hist, start=1): ip.history_manager.store_inputs(i, h) @@ -82,7 +85,8 @@ def test_history(): testfilename = os.path.realpath(os.path.join(tmpdir, "test.py")) ip.magic_save(testfilename + " ~1/1-3") testfile = open(testfilename, "r") - nt.assert_equal(testfile.read(), "\n".join(hist)) + nt.assert_equal(testfile.read().decode("utf-8"), + "# coding: utf-8\n" + "\n".join(hist)) # Duplicate line numbers - check that it doesn't crash, and # gets a new session @@ -92,6 +96,7 @@ def test_history(): # Restore history manager ip.history_manager = hist_manager_ori + def test_extract_hist_ranges(): instr = "1 2/3 ~4/5-6 ~4/7-~4/9 ~9/2-~7/5" expected = [(0, 1, 2), # 0 == current session diff --git a/IPython/core/tests/test_inputsplitter.py b/IPython/core/tests/test_inputsplitter.py index 5b2cb9c..4cf588b 100644 --- a/IPython/core/tests/test_inputsplitter.py +++ b/IPython/core/tests/test_inputsplitter.py @@ -364,7 +364,7 @@ class InputSplitterTestCase(unittest.TestCase): def test_unicode(self): self.isp.push(u"Pérez") self.isp.push(u'\xc3\xa9') - self.isp.push("u'\xc3\xa9'") + self.isp.push(u"u'\xc3\xa9'") class InteractiveLoopTestCase(unittest.TestCase): """Tests for an interactive loop like a python shell. diff --git a/IPython/core/tests/test_magic.py b/IPython/core/tests/test_magic.py index 3444600..25ed013 100644 --- a/IPython/core/tests/test_magic.py +++ b/IPython/core/tests/test_magic.py @@ -293,9 +293,9 @@ def test_parse_options(): def test_dirops(): """Test various directory handling operations.""" - curpath = lambda :os.path.splitdrive(os.getcwd())[1].replace('\\','/') + curpath = lambda :os.path.splitdrive(os.getcwdu())[1].replace('\\','/') - startdir = os.getcwd() + startdir = os.getcwdu() ipdir = _ip.ipython_dir try: _ip.magic('cd "%s"' % ipdir) diff --git a/IPython/testing/iptest.py b/IPython/testing/iptest.py index cfccb8e..fce2111 100644 --- a/IPython/testing/iptest.py +++ b/IPython/testing/iptest.py @@ -105,8 +105,6 @@ have['zope.interface'] = test_for('zope.interface') have['twisted'] = test_for('twisted') have['foolscap'] = test_for('foolscap') have['pexpect'] = test_for('pexpect') -have['gtk'] = test_for('gtk') -have['gobject'] = test_for('gobject') #----------------------------------------------------------------------------- # Functions and classes @@ -170,9 +168,10 @@ def make_exclude(): if not have['wx']: exclusions.append(ipjoin('lib', 'inputhookwx')) - - if not have['gtk'] or not have['gobject']: - exclusions.append(ipjoin('lib', 'inputhookgtk')) + + # We do this unconditionally, so that the test suite doesn't import + # gtk, changing the default encoding and masking some unicode bugs. + exclusions.append(ipjoin('lib', 'inputhookgtk')) # These have to be skipped on win32 because the use echo, rm, cd, etc. # See ticket https://bugs.launchpad.net/bugs/366982