upstream/ipython Files · IPython/nbformat/v3/nbpy.py

normalize unicode notebook filenames...

normalize unicode notebook filenames used in comparison check for notebook name change. Unless the filenames are normalized, unchanged names may result in false positives for a name change (e.g. OS X uses NFD on the filesystem, so u'\xfc' roundtripped to the filesystem will be u'u\u0308'), which can result in the first save of a notebook after open performing the following actions: 1. save the recently opened notebook 2. `old_name != new_name`, so name change detected 3. delete old_name (which is actually new_name), which ultimately deletes the just-saved notebook In master, this has a symptom of the first checkpoint failing because the first save actually deleted the file, and you can't checkpoint a notebook that doesn't exist. closes

MinRK - - Load All Authors

File last commit:

r7566:5d4da6eb


                r10777:9585bda6

Download file

             nbpy.py
        
                    204 lines
            
             | 7.4 KiB
            
                | text/x-python
            
             |
                PythonLexer
            
             / IPython / nbformat / v3 / nbpy.py
          
                    History
                
                 |
                  Annotation
                 | Raw
                 |Copy content
                 |Copy permalink

      """Read and write notebooks as regular .py files.

      Authors:

      * Brian Granger

      """

      #-----------------------------------------------------------------------------

      #  Copyright (C) 2008-2011  The IPython Development Team

      #

      #  Distributed under the terms of the BSD License.  The full license is in

      #  the file COPYING, distributed as part of this software.

      #-----------------------------------------------------------------------------

      #-----------------------------------------------------------------------------

      # Imports

      #-----------------------------------------------------------------------------

      import re

      from .rwbase import NotebookReader, NotebookWriter

      from .nbbase import (

          new_code_cell, new_text_cell, new_worksheet,

          new_notebook, new_heading_cell, nbformat, nbformat_minor,

      )

      #-----------------------------------------------------------------------------

      # Code

      #-----------------------------------------------------------------------------

      _encoding_declaration_re = re.compile(r"^#.*coding[:=]\s*([-\w.]+)")

      class PyReaderError(Exception):

          pass

      class PyReader(NotebookReader):

          def reads(self, s, **kwargs):

              return self.to_notebook(s,**kwargs)

          def to_notebook(self, s, **kwargs):

              lines = s.splitlines()

              cells = []

              cell_lines = []

              kwargs = {}

              state = u'codecell'

              for line in lines:

                  if line.startswith(u'# <nbformat>') or _encoding_declaration_re.match(line):

                      pass

                  elif line.startswith(u'# <codecell>'):

                      cell = self.new_cell(state, cell_lines, **kwargs)

                      if cell is not None:

                          cells.append(cell)

                      state = u'codecell'

                      cell_lines = []

                      kwargs = {}

                  elif line.startswith(u'# <htmlcell>'):

                      cell = self.new_cell(state, cell_lines, **kwargs)

                      if cell is not None:

                          cells.append(cell)

                      state = u'htmlcell'

                      cell_lines = []

                      kwargs = {}

                  elif line.startswith(u'# <markdowncell>'):

                      cell = self.new_cell(state, cell_lines, **kwargs)

                      if cell is not None:

                          cells.append(cell)

                      state = u'markdowncell'

                      cell_lines = []

                      kwargs = {}

                  # VERSIONHACK: plaintext -> raw

                  elif line.startswith(u'# <rawcell>') or line.startswith(u'# <plaintextcell>'):

                      cell = self.new_cell(state, cell_lines, **kwargs)

                      if cell is not None:

                          cells.append(cell)

                      state = u'rawcell'

                      cell_lines = []

                      kwargs = {}

                  elif line.startswith(u'# <headingcell'):

                      cell = self.new_cell(state, cell_lines, **kwargs)

                      if cell is not None:

                          cells.append(cell)

                          cell_lines = []

                      m = re.match(r'# <headingcell level=(?P<level>\d)>',line)

                      if m is not None:

                          state = u'headingcell'

                          kwargs = {}

                          kwargs['level'] = int(m.group('level'))

                      else:

                          state = u'codecell'

                          kwargs = {}

                          cell_lines = []

                  else:

                      cell_lines.append(line)

              if cell_lines and state == u'codecell':

                  cell = self.new_cell(state, cell_lines)

                  if cell is not None:

                      cells.append(cell)

              ws = new_worksheet(cells=cells)

              nb = new_notebook(worksheets=[ws])

              return nb

          def new_cell(self, state, lines, **kwargs):

              if state == u'codecell':

                  input = u'\n'.join(lines)

                  input = input.strip(u'\n')

                  if input:

                      return new_code_cell(input=input)

              elif state == u'htmlcell':

                  text = self._remove_comments(lines)

                  if text:

                      return new_text_cell(u'html',source=text)

              elif state == u'markdowncell':

                  text = self._remove_comments(lines)

                  if text:

                      return new_text_cell(u'markdown',source=text)

              elif state == u'rawcell':

                  text = self._remove_comments(lines)

                  if text:

                      return new_text_cell(u'raw',source=text)

              elif state == u'headingcell':

                  text = self._remove_comments(lines)

                  level = kwargs.get('level',1)

                  if text:

                      return new_heading_cell(source=text,level=level)

          def _remove_comments(self, lines):

              new_lines = []

              for line in lines:

                  if line.startswith(u'#'):

                      new_lines.append(line[2:])

                  else:

                      new_lines.append(line)

              text = u'\n'.join(new_lines)

              text = text.strip(u'\n')

              return text

          def split_lines_into_blocks(self, lines):

              if len(lines) == 1:

                  yield lines[0]

                  raise StopIteration()

              import ast

              source = '\n'.join(lines)

              code = ast.parse(source)

              starts = [x.lineno-1 for x in code.body]

              for i in range(len(starts)-1):

                  yield '\n'.join(lines[starts[i]:starts[i+1]]).strip('\n')

              yield '\n'.join(lines[starts[-1]:]).strip('\n')

      class PyWriter(NotebookWriter):

          def writes(self, nb, **kwargs):

              lines = [u'# -*- coding: utf-8 -*-']

              lines.extend([

                  u'# <nbformat>%i.%i</nbformat>' % (nbformat, nbformat_minor),

                  u'',

              ])

              for ws in nb.worksheets:

                  for cell in ws.cells:

                      if cell.cell_type == u'code':

                          input = cell.get(u'input')

                          if input is not None:

                              lines.extend([u'# <codecell>',u''])

                              lines.extend(input.splitlines())

                              lines.append(u'')

                      elif cell.cell_type == u'html':

                          input = cell.get(u'source')

                          if input is not None:

                              lines.extend([u'# <htmlcell>',u''])

                              lines.extend([u'# ' + line for line in input.splitlines()])

                              lines.append(u'')

                      elif cell.cell_type == u'markdown':

                          input = cell.get(u'source')

                          if input is not None:

                              lines.extend([u'# <markdowncell>',u''])

                              lines.extend([u'# ' + line for line in input.splitlines()])

                              lines.append(u'')

                      elif cell.cell_type == u'raw':

                          input = cell.get(u'source')

                          if input is not None:

                              lines.extend([u'# <rawcell>',u''])

                              lines.extend([u'# ' + line for line in input.splitlines()])

                              lines.append(u'')

                      elif cell.cell_type == u'heading':

                          input = cell.get(u'source')

                          level = cell.get(u'level',1)

                          if input is not None:

                              lines.extend([u'# <headingcell level=%s>' % level,u''])

                              lines.extend([u'# ' + line for line in input.splitlines()])

                              lines.append(u'')

              lines.append('')

              return unicode('\n'.join(lines))

      _reader = PyReader()

      _writer = PyWriter()

      reads = _reader.reads

      read = _reader.read

      to_notebook = _reader.to_notebook

      write = _writer.write

      writes = _writer.writes

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

				"""Read and write notebooks as regular .py files.

				Authors:

				* Brian Granger
				"""

				#-----------------------------------------------------------------------------
				# Copyright (C) 2008-2011 The IPython Development Team
				#
				# Distributed under the terms of the BSD License. The full license is in
				# the file COPYING, distributed as part of this software.
				#-----------------------------------------------------------------------------

				#-----------------------------------------------------------------------------
				# Imports
				#-----------------------------------------------------------------------------

				import re
				from .rwbase import NotebookReader, NotebookWriter
				from .nbbase import (
				new_code_cell, new_text_cell, new_worksheet,
				new_notebook, new_heading_cell, nbformat, nbformat_minor,
				)

				#-----------------------------------------------------------------------------
				# Code
				#-----------------------------------------------------------------------------

				_encoding_declaration_re = re.compile(r"^#.coding[:=]\s([-\w.]+)")

				class PyReaderError(Exception):
				pass


				class PyReader(NotebookReader):

				def reads(self, s, **kwargs):
				return self.to_notebook(s,**kwargs)

				def to_notebook(self, s, **kwargs):
				lines = s.splitlines()
				cells = []
				cell_lines = []
				kwargs = {}
				state = u'codecell'
				for line in lines:
				if line.startswith(u'# <nbformat>') or _encoding_declaration_re.match(line):
				pass
				elif line.startswith(u'# <codecell>'):
				cell = self.new_cell(state, cell_lines, **kwargs)
				if cell is not None:
				cells.append(cell)
				state = u'codecell'
				cell_lines = []
				kwargs = {}
				elif line.startswith(u'# <htmlcell>'):
				cell = self.new_cell(state, cell_lines, **kwargs)
				if cell is not None:
				cells.append(cell)
				state = u'htmlcell'
				cell_lines = []
				kwargs = {}
				elif line.startswith(u'# <markdowncell>'):
				cell = self.new_cell(state, cell_lines, **kwargs)
				if cell is not None:
				cells.append(cell)
				state = u'markdowncell'
				cell_lines = []
				kwargs = {}
				# VERSIONHACK: plaintext -> raw
				elif line.startswith(u'# <rawcell>') or line.startswith(u'# <plaintextcell>'):
				cell = self.new_cell(state, cell_lines, **kwargs)
				if cell is not None:
				cells.append(cell)
				state = u'rawcell'
				cell_lines = []
				kwargs = {}
				elif line.startswith(u'# <headingcell'):
				cell = self.new_cell(state, cell_lines, **kwargs)
				if cell is not None:
				cells.append(cell)
				cell_lines = []
				m = re.match(r'# <headingcell level=(?P<level>\d)>',line)
				if m is not None:
				state = u'headingcell'
				kwargs = {}
				kwargs['level'] = int(m.group('level'))
				else:
				state = u'codecell'
				kwargs = {}
				cell_lines = []
				else:
				cell_lines.append(line)
				if cell_lines and state == u'codecell':
				cell = self.new_cell(state, cell_lines)
				if cell is not None:
				cells.append(cell)
				ws = new_worksheet(cells=cells)
				nb = new_notebook(worksheets=[ws])
				return nb

				def new_cell(self, state, lines, **kwargs):
				if state == u'codecell':
				input = u'\n'.join(lines)
				input = input.strip(u'\n')
				if input:
				return new_code_cell(input=input)
				elif state == u'htmlcell':
				text = self._remove_comments(lines)
				if text:
				return new_text_cell(u'html',source=text)
				elif state == u'markdowncell':
				text = self._remove_comments(lines)
				if text:
				return new_text_cell(u'markdown',source=text)
				elif state == u'rawcell':
				text = self._remove_comments(lines)
				if text:
				return new_text_cell(u'raw',source=text)
				elif state == u'headingcell':
				text = self._remove_comments(lines)
				level = kwargs.get('level',1)
				if text:
				return new_heading_cell(source=text,level=level)

				def _remove_comments(self, lines):
				new_lines = []
				for line in lines:
				if line.startswith(u'#'):
				new_lines.append(line[2:])
				else:
				new_lines.append(line)
				text = u'\n'.join(new_lines)
				text = text.strip(u'\n')
				return text

				def split_lines_into_blocks(self, lines):
				if len(lines) == 1:
				yield lines[0]
				raise StopIteration()
				import ast
				source = '\n'.join(lines)
				code = ast.parse(source)
				starts = [x.lineno-1 for x in code.body]
				for i in range(len(starts)-1):
				yield '\n'.join(lines[starts[i]:starts[i+1]]).strip('\n')
				yield '\n'.join(lines[starts[-1]:]).strip('\n')


				class PyWriter(NotebookWriter):

				def writes(self, nb, **kwargs):
				lines = [u'# -- coding: utf-8 --']
				lines.extend([
				u'# <nbformat>%i.%i</nbformat>' % (nbformat, nbformat_minor),
				u'',
				])
				for ws in nb.worksheets:
				for cell in ws.cells:
				if cell.cell_type == u'code':
				input = cell.get(u'input')
				if input is not None:
				lines.extend([u'# <codecell>',u''])
				lines.extend(input.splitlines())
				lines.append(u'')
				elif cell.cell_type == u'html':
				input = cell.get(u'source')
				if input is not None:
				lines.extend([u'# <htmlcell>',u''])
				lines.extend([u'# ' + line for line in input.splitlines()])
				lines.append(u'')
				elif cell.cell_type == u'markdown':
				input = cell.get(u'source')
				if input is not None:
				lines.extend([u'# <markdowncell>',u''])
				lines.extend([u'# ' + line for line in input.splitlines()])
				lines.append(u'')
				elif cell.cell_type == u'raw':
				input = cell.get(u'source')
				if input is not None:
				lines.extend([u'# <rawcell>',u''])
				lines.extend([u'# ' + line for line in input.splitlines()])
				lines.append(u'')
				elif cell.cell_type == u'heading':
				input = cell.get(u'source')
				level = cell.get(u'level',1)
				if input is not None:
				lines.extend([u'# <headingcell level=%s>' % level,u''])
				lines.extend([u'# ' + line for line in input.splitlines()])
				lines.append(u'')
				lines.append('')
				return unicode('\n'.join(lines))


				_reader = PyReader()
				_writer = PyWriter()

				reads = _reader.reads
				read = _reader.read
				to_notebook = _reader.to_notebook
				write = _writer.write
				writes = _writer.writes