From dad1e44e4e7b65f89ad938042b4e5b08509188ad 2011-03-24 22:43:05
From: Thomas Kluyver <takowl@gmail.com>
Date: 2011-03-24 22:43:05
Subject: [PATCH] Fix up so tests pass again. input_splitter now uses ast module instead of compiler, bringing it closer to the Python 3 implementation.

---

diff --git a/IPython/core/inputsplitter.py b/IPython/core/inputsplitter.py
index 670f468..07fb33e 100644
--- a/IPython/core/inputsplitter.py
+++ b/IPython/core/inputsplitter.py
@@ -66,6 +66,7 @@ from __future__ import print_function
 # Imports
 #-----------------------------------------------------------------------------
 # stdlib
+import ast
 import codeop
 import re
 import sys
@@ -185,9 +186,6 @@ def split_blocks(python):
     commands : list of str
         Separate commands that can be exec'ed independently.
     """
-
-    import compiler
-    
     # compiler.parse treats trailing spaces after a newline as a
     # SyntaxError.  This is different than codeop.CommandCompiler, which
     # will compile the trailng spaces just fine.  We simply strip any
@@ -197,22 +195,15 @@ def split_blocks(python):
     python_ori = python # save original in case we bail on error
     python = python.strip()
 
-    # The compiler module does not like unicode. We need to convert
-    # it encode it:
-    if isinstance(python, unicode):
-        # Use the utf-8-sig BOM so the compiler detects this a UTF-8
-        # encode string.
-        python = '\xef\xbb\xbf' + python.encode('utf-8')
-
     # The compiler module will parse the code into an abstract syntax tree.
     # This has a bug with str("a\nb"), but not str("""a\nb""")!!!
     try:
-        ast = compiler.parse(python)
+        code_ast = ast.parse(python)
     except:
         return [python_ori]
 
     # Uncomment to help debug the ast tree
-    # for n in ast.node:
+    # for n in code_ast.body:
     #     print n.lineno,'->',n
 
     # Each separate command is available by iterating over ast.node. The
@@ -223,14 +214,7 @@ def split_blocks(python):
     # other situations that cause Discard nodes that shouldn't be discarded.
     # We might eventually discover other cases where lineno is None and have
     # to put in a more sophisticated test.
-    linenos = [x.lineno-1 for x in ast.node if x.lineno is not None]
-
-    # When we have a bare string as the first statement, it does not end up as
-    # a Discard Node in the AST as we might expect. Instead, it gets interpreted
-    # as the docstring of the module. Check for this case and prepend 0 (the
-    # first line number) to the list of linenos to account for it.
-    if ast.doc is not None:
-        linenos.insert(0, 0)
+    linenos = [x.lineno-1 for x in code_ast.body if x.lineno is not None]
 
     # When we finally get the slices, we will need to slice all the way to
     # the end even though we don't have a line number for it. Fortunately,
@@ -616,7 +600,7 @@ class InputSplitter(object):
         setattr(self, store, self._set_source(buffer))
 
     def _set_source(self, buffer):
-        return ''.join(buffer).encode(self.encoding)
+        return ''.join(buffer)
 
 
 #-----------------------------------------------------------------------------
diff --git a/IPython/core/interactiveshell.py b/IPython/core/interactiveshell.py
index e2e7ebe..f4756bc 100644
--- a/IPython/core/interactiveshell.py
+++ b/IPython/core/interactiveshell.py
@@ -1550,12 +1550,14 @@ class InteractiveShell(Configurable, Magic):
             # otherwise we end up with a monster history after a while:
             readline.set_history_length(self.history_length)
             
+            stdin_encoding = sys.stdin.encoding or "utf-8"
+            
             # Load the last 1000 lines from history
             for _, _, cell in self.history_manager.get_tail(1000,
                                                 include_latest=True):
                 if cell.strip(): # Ignore blank lines
                     for line in cell.splitlines():
-                        readline.add_history(line)
+                        readline.add_history(line.encode(stdin_encoding))
 
         # Configure auto-indent for all platforms
         self.set_autoindent(self.autoindent)
@@ -2105,7 +2107,6 @@ class InteractiveShell(Configurable, Magic):
         if len(cell.splitlines()) <= 1:
             cell = self.prefilter_manager.prefilter_line(blocks[0])
             blocks = self.input_splitter.split_blocks(cell)
-        
 
         # Store the 'ipython' version of the cell as well, since that's what
         # needs to go into the translated history and get executed (the
@@ -2246,7 +2247,7 @@ class InteractiveShell(Configurable, Magic):
         else:
             usource = source
 
-        if 0:  # dbg
+        if False:  # dbg
             print 'Source:', repr(source)  # dbg
             print 'USource:', repr(usource)  # dbg
             print 'type:', type(source) # dbg
diff --git a/IPython/core/magic.py b/IPython/core/magic.py
index e01281c..afcced7 100644
--- a/IPython/core/magic.py
+++ b/IPython/core/magic.py
@@ -2063,7 +2063,8 @@ Currently the magic system has the following functions:\n"""
                 return
         cmds = self.extract_input_lines(ranges, 'r' in opts)
         with open(fname,'w') as f:
-            f.write(cmds)
+            f.write("# coding: utf-8\n")
+            f.write(cmds.encode("utf-8"))
         print 'The following commands were written to file `%s`:' % fname
         print cmds
 
diff --git a/IPython/core/tests/test_history.py b/IPython/core/tests/test_history.py
index 55e3252..db82828 100644
--- a/IPython/core/tests/test_history.py
+++ b/IPython/core/tests/test_history.py
@@ -83,7 +83,8 @@ def test_history():
             testfilename = os.path.realpath(os.path.join(tmpdir, "test.py"))
             ip.magic_save(testfilename + " ~1/1-3")
             testfile = open(testfilename, "r")
-            nt.assert_equal(testfile.read(), "\n".join(hist))
+            nt.assert_equal(testfile.read().decode("utf-8"),
+                    "# coding: utf-8\n" + "\n".join(hist))
             
             # Duplicate line numbers - check that it doesn't crash, and
             # gets a new session