"""Base classes for the notebook conversion pipeline.

This module defines Converter, from which all objects designed to implement
a conversion of IPython notebooks to some other format should inherit.
"""
#-----------------------------------------------------------------------------
# Copyright (c) 2012, the IPython Development Team.
#
# Distributed under the terms of the Modified BSD License.
#
# The full license is in the file COPYING.txt, distributed with this software.
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------

from __future__ import print_function, absolute_import

# Stdlib imports
import codecs
import io
import logging
import os
import pprint
import re
from types import FunctionType

# IPython imports
from IPython.nbformat import current as nbformat
from IPython.config.configurable import Configurable, SingletonConfigurable
from IPython.utils.traitlets import List, Unicode, Type, Bool, Dict, CaselessStrEnum

# Our own imports
from .utils import remove_fake_files_url


#-----------------------------------------------------------------------------
# Local utilities
#-----------------------------------------------------------------------------

def clean_filename(filename):
    """
    Remove non-alphanumeric characters from filenames.

    Parameters
    ----------
    filename : str
        The filename to be sanitized.

    Returns
    -------
    clean : str
        A sanitized filename that contains only alphanumeric
        characters and underscores.
    """
    filename = re.sub(r'[^a-zA-Z0-9_]', '_', filename)
    return filename


#-----------------------------------------------------------------------------
# Class declarations
#-----------------------------------------------------------------------------

class ConversionException(Exception):
    pass


class DocStringInheritor(type):
    """
    This metaclass will walk the list of bases until the desired
    superclass method is found AND if that method has a docstring and only
    THEN does it attach the superdocstring to the derived class method.

    Please use carefully, I just did the metaclass thing by following
    Michael Foord's Metaclass tutorial
    (http://www.voidspace.org.uk/python/articles/metaclasses.shtml), I may
    have missed a step or two.

    source:
    http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
    by Paul McGuire
    """
    def __new__(meta, classname, bases, classDict):
        newClassDict = {}
        for attributeName, attribute in classDict.items():
            if type(attribute) == FunctionType:
                # look through bases for matching function by name
                for baseclass in bases:
                    if hasattr(baseclass, attributeName):
                        basefn = getattr(baseclass, attributeName)
                        if basefn.__doc__:
                            attribute.__doc__ = basefn.__doc__
                            break
            newClassDict[attributeName] = attribute
        return type.__new__(meta, classname, bases, newClassDict)


class Converter(Configurable):
    #__metaclass__ = DocStringInheritor
    #-------------------------------------------------------------------------
    # Class-level attributes determining the behaviour of the class but
    # probably not varying from instance to instance.
    #-------------------------------------------------------------------------
    default_encoding = 'utf-8'
    extension = str()
    blank_symbol = " "
    # Which display data format is best? Subclasses can override if
    # they have specific requirements.
    display_data_priority = ['pdf', 'svg', 'png', 'jpg', 'text']
    #-------------------------------------------------------------------------
    # Instance-level attributes that are set in the constructor for this
    # class.
    #-------------------------------------------------------------------------
    infile = Unicode()

    highlight_source = Bool(True,
                     config=True,
                     help="Enable syntax highlighting for code blocks.")

    preamble = Unicode("" ,
                        config=True,
                        help="Path to a user-specified preamble file")

    infile_dir = Unicode()
    infile_root = Unicode()
    clean_name = Unicode()
    files_dir = Unicode()
    outbase = Unicode()
    #-------------------------------------------------------------------------
    # Instance-level attributes that are set by other methods in the base
    # class.
    #-------------------------------------------------------------------------
    figures_counter = 0
    output = Unicode()
    #-------------------------------------------------------------------------
    # Instance-level attributes that are not actually mentioned further
    # in this class. TODO: Could they be usefully moved to a subclass?
    #-------------------------------------------------------------------------
    with_preamble = Bool(True,config=True)
    user_preamble = None
    raw_as_verbatim = False


    def __init__(self, infile='', config=None, exclude=[], **kw):
        super(Converter,self).__init__(config=config)

        #DocStringInheritor.__init__(self=config)
        # N.B. Initialized in the same order as defined above. Please try to
        # keep in this way for readability's sake.
        self.exclude_cells = exclude
        self.infile = infile
        self.infile_dir, infile_root = os.path.split(infile)
        self.infile_root = os.path.splitext(infile_root)[0]
        self.clean_name = clean_filename(self.infile_root)
        # Handle the creation of a directory for ancillary files, for
        # formats that need one.
        files_dir = os.path.join(self.infile_dir, self.clean_name + '_files')
        if not os.path.isdir(files_dir):
            os.mkdir(files_dir)
        self.files_dir = files_dir
        self.outbase = os.path.join(self.infile_dir, self.infile_root)

    def __del__(self):
        if os.path.isdir(self.files_dir) and not os.listdir(self.files_dir):
            os.rmdir(self.files_dir)

    def _get_prompt_number(self, cell):
        return cell.prompt_number if hasattr(cell, 'prompt_number') \
            else self.blank_symbol

    def dispatch(self, cell_type):
        """return cell_type dependent render method,  for example render_code
        """
        return getattr(self, 'render_' + cell_type, self.render_unknown)

    def dispatch_display_format(self, format):
        """
        return output_type dependent render method,  for example
        render_output_text
        """
        return getattr(self, 'render_display_format_' + format,
                       self.render_unknown_display)

    def convert(self, cell_separator='\n'):
        """
        Generic method to converts notebook to a string representation.

        This is accomplished by dispatching on the cell_type, so subclasses of
        Convereter class do not need to re-implement this method, but just
        need implementation for the methods that will be dispatched.

        Parameters
        ----------
        cell_separator : string
          Character or string to join cells with. Default is "\n"

        Returns
        -------
        out : string
        """
        lines = []
        lines.extend(self.optional_header())
        lines.extend(self.main_body(cell_separator))
        lines.extend(self.optional_footer())
        return u'\n'.join(lines)

    def main_body(self, cell_separator='\n'):
        converted_cells = []
        for worksheet in self.nb.worksheets:
            for cell in worksheet.cells:
                #print(cell.cell_type)  # dbg
                conv_fn = self.dispatch(cell.cell_type)
                if cell.cell_type in ('markdown', 'raw'):
                    remove_fake_files_url(cell)
                converted_cells.append('\n'.join(conv_fn(cell)))
        cell_lines = cell_separator.join(converted_cells).split('\n')
        return cell_lines

    def render(self):
        "read, convert, and save self.infile"
        if not hasattr(self, 'nb'):
            self.read()
        self.output = self.convert()
        assert(type(self.output) == unicode)
        return self.save()

    def read(self):
        "read and parse notebook into NotebookNode called self.nb"
        with open(self.infile) as f:
            self.nb = nbformat.read(f, 'json')

    def save(self, outfile=None, encoding=None):
        "read and parse notebook into self.nb"
        if outfile is None:
            outfile = self.outbase + '.' + self.extension
        if encoding is None:
            encoding = self.default_encoding
        with io.open(outfile, 'w', encoding=encoding) as f:
            f.write(self.output)
        return os.path.abspath(outfile)

    def optional_header(self):
        """
        Optional header to insert at the top of the converted notebook

        Returns a list
        """
        return []

    def optional_footer(self):
        """
        Optional footer to insert at the end of the converted notebook

        Returns a list
        """
        return []

    def _new_figure(self, data, fmt):
        """Create a new figure file in the given format.

        Returns a path relative to the input file.
        """
        figname = '%s_fig_%02i.%s' % (self.clean_name,
                                      self.figures_counter, fmt)
        self.figures_counter += 1
        fullname = os.path.join(self.files_dir, figname)

        # Binary files are base64-encoded, SVG is already XML
        if fmt in ('png', 'jpg', 'pdf'):
            data = data.decode('base64')
            fopen = lambda fname: open(fname, 'wb')
        else:
            fopen = lambda fname: codecs.open(fname, 'wb',
                                              self.default_encoding)

        with fopen(fullname) as f:
            f.write(data)

        return fullname

    def render_heading(self, cell):
        """convert a heading cell

        Returns list."""
        raise NotImplementedError

    def render_code(self, cell):
        """Convert a code cell

        Returns list."""
        raise NotImplementedError

    def render_markdown(self, cell):
        """convert a markdown cell

        Returns list."""
        raise NotImplementedError

    def _img_lines(self, img_file):
        """Return list of lines to include an image file."""
        # Note: subclasses may choose to implement format-specific _FMT_lines
        # methods if they so choose (FMT in {png, svg, jpg, pdf}).
        raise NotImplementedError

    def render_display_data(self, output):
        """convert display data from the output of a code cell

        Returns list.
        """
        for fmt in self.display_data_priority:
            if fmt in output:
                break
        else:
            for fmt in output:
                if fmt != 'output_type':
                    break
            else:
                raise RuntimeError('no display data')

        # Is it an image?
        if fmt in ['png', 'svg', 'jpg', 'pdf']:
            img_file = self._new_figure(output[fmt], fmt)
            # Subclasses can have format-specific render functions (e.g.,
            # latex has to auto-convert all SVG to PDF first).
            lines_fun = getattr(self, '_%s_lines' % fmt, None)
            if not lines_fun:
                lines_fun = self._img_lines
            lines = lines_fun(img_file)
        else:
            lines_fun = self.dispatch_display_format(fmt)
            lines = lines_fun(output)

        return lines

    def render_raw(self, cell):
        """convert a cell with raw text

        Returns list."""
        raise NotImplementedError

    def render_unknown(self, cell):
        """Render cells of unkown type

        Returns list."""
        data = pprint.pformat(cell)
        logging.warning('Unknown cell: %s' % cell.cell_type)
        return self._unknown_lines(data)

    def render_unknown_display(self, output, type):
        """Render cells of unkown type

        Returns list."""
        data = pprint.pformat(output)
        logging.warning('Unknown output: %s' % output.output_type)
        return self._unknown_lines(data)

    def render_stream(self, output):
        """render the stream part of an output

        Returns list.

        Identical to render_display_format_text
        """
        return self.render_display_format_text(output)

    def render_pyout(self, output):
        """convert pyout part of a code cell

        Returns list."""
        raise NotImplementedError

    def render_pyerr(self, output):
        """convert pyerr part of a code cell

        Returns list."""
        raise NotImplementedError

    def _unknown_lines(self, data):
        """Return list of lines for an unknown cell.

        Parameters
        ----------
        data : str
          The content of the unknown data as a single string.
        """
        raise NotImplementedError

    # These are the possible format types in an output node

    def render_display_format_text(self, output):
        """render the text part of an output

        Returns list.
        """
        raise NotImplementedError

    def render_display_format_html(self, output):
        """render the html part of an output

        Returns list.
        """
        raise NotImplementedError

    def render_display_format_latex(self, output):
        """render the latex part of an output

        Returns list.
        """
        raise NotImplementedError

    def render_display_format_json(self, output):
        """render the json part of an output

        Returns list.
        """
        raise NotImplementedError

    def render_display_format_javascript(self, output):
        """render the javascript part of an output

        Returns list.
        """
        raise NotImplementedError