From 3deb45e852b2dac658d748021696e0e84a317967 2011-12-16 00:17:41 From: Fernando Perez Date: 2011-12-16 00:17:41 Subject: [PATCH] Merge pull request #1161 from ipython/loadpy-fixes Allow %loadpy to load remote URLs that don't end in .py. Also strip encoding declarations from loaded files, which are invalid for interactively defined strings. --- diff --git a/IPython/core/magic.py b/IPython/core/magic.py index ee84680..26c269c 100644 --- a/IPython/core/magic.py +++ b/IPython/core/magic.py @@ -96,6 +96,9 @@ def needs_local_scope(func): # Used for exception handling in magic_edit class MacroToEdit(ValueError): pass +# Taken from PEP 263, this is the official encoding regexp. +_encoding_declaration_re = re.compile(r"^#.*coding[:=]\s*([-\w.]+)") + #*************************************************************************** # Main class implementing Magic functionality @@ -2151,16 +2154,33 @@ Currently the magic system has the following functions:\n""" %loadpy http://www.example.com/myscript.py """ arg_s = unquote_filename(arg_s) - if not arg_s.endswith('.py'): + remote_url = arg_s.startswith(('http://', 'https://')) + local_url = not remote_url + if local_url and not arg_s.endswith('.py'): + # Local files must be .py; for remote URLs it's possible that the + # fetch URL doesn't have a .py in it (many servers have an opaque + # URL, such as scipy-central.org). raise ValueError('%%load only works with .py files: %s' % arg_s) - if arg_s.startswith('http'): + if remote_url: import urllib2 - response = urllib2.urlopen(arg_s) - content = response.read() + fileobj = urllib2.urlopen(arg_s) + # While responses have a .info().getencoding() way of asking for + # their encoding, in *many* cases the return value is bogus. In + # the wild, servers serving utf-8 but declaring latin-1 are + # extremely common, as the old HTTP standards specify latin-1 as + # the default but many modern filesystems use utf-8. So we can NOT + # rely on the headers. Short of building complex encoding-guessing + # logic, going with utf-8 is a simple solution likely to be right + # in most real-world cases. + linesource = fileobj.read().decode('utf-8', 'replace').splitlines() else: - with open(arg_s) as f: - content = f.read() - self.set_next_input(content) + fileobj = linesource = open(arg_s) + + # Strip out encoding declarations + lines = [l for l in linesource if not _encoding_declaration_re.match(l)] + fileobj.close() + + self.set_next_input(os.linesep.join(lines)) def _find_edit_target(self, args, opts, last_call): """Utility method used by magic_edit to find what to edit."""