upstream/ipython Commit - r15057:b2bbfc5c

Import HTMLParser from html.parser for Python 3.x

Jonathan Frederic -

r15057:b2bbfc5c

parent child

IPython/nbconvert/filters/citation.py

0 +4 0

              """Citation handling for LaTeX output."""
              #-----------------------------------------------------------------------------
              # Copyright (c) 2013, the IPython Development Team.
              #
              # Distributed under the terms of the Modified BSD License.
              #
              # The full license is in the file COPYING.txt, distributed with this software.
              #-----------------------------------------------------------------------------
              #-----------------------------------------------------------------------------
              # Imports
              #-----------------------------------------------------------------------------
+             from IPython.utils.py3compat import PY3
+             if PY3:
+                 from html.parser import HTMLParser
+             else:
              from HTMLParser import HTMLParser
              #-----------------------------------------------------------------------------
              # Functions
              #-----------------------------------------------------------------------------
              __all__ = ['citation2latex']
              def citation2latex(s):
                  """Parse citations in Markdown cells.
                  This looks for HTML tags having a data attribute names `data-cite`
                  and replaces it by the call to LaTeX cite command. The tranformation
                  looks like this:
                  `<cite data-cite="granger">(Granger, 2013)</cite>`
                  Becomes
                  `\\cite{granger}`
                  Any HTML tag can be used, which allows the citations to be formatted
                  in HTML in any manner.
                  """
                  parser = CitationParser()
                  parser.feed(s)
                  parser.close()
                  outtext = u''
                  startpos = 0
                  for citation in parser.citelist:
                          outtext += s[startpos:citation[1]]
                          outtext += '\\cite{%s}'%citation[0]
                          startpos = citation[2] if len(citation)==3 else -1
                  outtext += s[startpos:] if startpos != -1 else ''
                  return outtext
              #-----------------------------------------------------------------------------
              # Classes
              #-----------------------------------------------------------------------------
              class CitationParser(HTMLParser):
                  """Citation Parser
                  Replaces html tags with data-cite attribute with respective latex \\cite.
                  Inherites from HTMLParser, overrides:
                   - handle_starttag
                   - handle_endtag
                  """
                  # number of open tags
                  opentags = None
                  # list of found citations
                  citelist = None
                  # active citation tag
                  citetag = None
                  def __init__(self):
                      self.citelist = []
                      self.opentags = 0
                      HTMLParser.__init__(self)
                  def get_offset(self):
                      # Compute startposition in source
                      lin, offset = self.getpos()
                      pos = 0
                      for i in range(lin-1):
                          pos = self.data.find('\n',pos) + 1
                      return pos + offset
                  def handle_starttag(self, tag, attrs):
                      # for each tag check if attributes are present and if no citation is active
                      if self.opentags == 0 and len(attrs)>0:
                          for atr, data in attrs:
                              if atr.lower() == 'data-cite':
                                  self.citetag = tag
                                  self.opentags = 1
                                  self.citelist.append([data, self.get_offset()])
                                  return
                      if tag == self.citetag:
                          # found an open citation tag but not the starting one
                          self.opentags += 1
                  def handle_endtag(self, tag):
                      if tag == self.citetag:
                          # found citation tag check if starting one
                          if self.opentags == 1:
                              pos = self.get_offset()
                              self.citelist[-1].append(pos+len(tag)+3)
                          self.opentags -= 1
                  def feed(self, data):
                      self.data = data
                      HTMLParser.feed(self, data)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages