diff --git a/IPython/nbconvert/filters/citation.py b/IPython/nbconvert/filters/citation.py
index 1442d55..2ecd717 100644
--- a/IPython/nbconvert/filters/citation.py
+++ b/IPython/nbconvert/filters/citation.py
@@ -9,9 +9,17 @@
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
-# Code
+# Imports
#-----------------------------------------------------------------------------
+from IPython.utils.py3compat import PY3
+if PY3:
+ from html.parser import HTMLParser
+else:
+ from HTMLParser import HTMLParser
+#-----------------------------------------------------------------------------
+# Functions
+#-----------------------------------------------------------------------------
__all__ = ['citation2latex']
@@ -32,41 +40,72 @@ def citation2latex(s):
Any HTML tag can be used, which allows the citations to be formatted
in HTML in any manner.
"""
- try:
- from lxml import html
- except ImportError:
- return s
-
- tree = html.fragment_fromstring(s, create_parent='div')
- _process_node_cite(tree)
- s = html.tostring(tree, encoding='unicode')
- if s.endswith(''):
- s = s[:-6]
- if s.startswith('
'):
- s = s[5:]
- return s
+ parser = CitationParser()
+ parser.feed(s)
+ parser.close()
+ outtext = u''
+ startpos = 0
+ for citation in parser.citelist:
+ outtext += s[startpos:citation[1]]
+ outtext += '\\cite{%s}'%citation[0]
+ startpos = citation[2] if len(citation)==3 else -1
+ outtext += s[startpos:] if startpos != -1 else ''
+ return outtext
+#-----------------------------------------------------------------------------
+# Classes
+#-----------------------------------------------------------------------------
+class CitationParser(HTMLParser):
+ """Citation Parser
-def _process_node_cite(node):
- """Do the citation replacement as we walk the lxml tree."""
+ Replaces html tags with data-cite attribute with respective latex \\cite.
- def _get(o, name):
- value = getattr(o, name, None)
- return '' if value is None else value
+ Inherites from HTMLParser, overrides:
+ - handle_starttag
+ - handle_endtag
+ """
+ # number of open tags
+ opentags = None
+ # list of found citations
+ citelist = None
+ # active citation tag
+ citetag = None
+
+ def __init__(self):
+ self.citelist = []
+ self.opentags = 0
+ HTMLParser.__init__(self)
- if 'data-cite' in node.attrib:
- cite = '\cite{%(ref)s}' % {'ref': node.attrib['data-cite']}
- prev = node.getprevious()
- if prev is not None:
- prev.tail = _get(prev, 'tail') + cite + _get(node, 'tail')
- else:
- parent = node.getparent()
- if parent is not None:
- parent.text = _get(parent, 'text') + cite + _get(node, 'tail')
- try:
- node.getparent().remove(node)
- except AttributeError:
- pass
- else:
- for child in node:
- _process_node_cite(child)
+ def get_offset(self):
+ # Compute startposition in source
+ lin, offset = self.getpos()
+ pos = 0
+ for i in range(lin-1):
+ pos = self.data.find('\n',pos) + 1
+ return pos + offset
+
+ def handle_starttag(self, tag, attrs):
+ # for each tag check if attributes are present and if no citation is active
+ if self.opentags == 0 and len(attrs)>0:
+ for atr, data in attrs:
+ if atr.lower() == 'data-cite':
+ self.citetag = tag
+ self.opentags = 1
+ self.citelist.append([data, self.get_offset()])
+ return
+
+ if tag == self.citetag:
+ # found an open citation tag but not the starting one
+ self.opentags += 1
+
+ def handle_endtag(self, tag):
+ if tag == self.citetag:
+ # found citation tag check if starting one
+ if self.opentags == 1:
+ pos = self.get_offset()
+ self.citelist[-1].append(pos+len(tag)+3)
+ self.opentags -= 1
+
+ def feed(self, data):
+ self.data = data
+ HTMLParser.feed(self, data)
diff --git a/IPython/nbconvert/filters/tests/test_citation.py b/IPython/nbconvert/filters/tests/test_citation.py
index f36c9ac..3fc898c 100644
--- a/IPython/nbconvert/filters/tests/test_citation.py
+++ b/IPython/nbconvert/filters/tests/test_citation.py
@@ -9,15 +9,13 @@
#-----------------------------------------------------------------------------
# Imports
#-----------------------------------------------------------------------------
-
from ..citation import citation2latex
from nose.tools import assert_equal
#-----------------------------------------------------------------------------
# Tests
#-----------------------------------------------------------------------------
-
-test_md = """
+test_md = {"""
# My Heading
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus ac magna non augue
@@ -26,14 +24,13 @@ velit, lobortis sed interdum at, vestibulum vitae libero
Thomas. Quisque iaculis ligula ut ipsum mattis viverra.
-Here is a plain paragraph that should be unaffected.
+Here is a plain paragraph that should be unaffected. It contains simple
+relations like 1<2 & 4>5.
* One Jonathan.
* Two Matthias.
* Three Paul.
-"""
-
-test_md_parsed = """
+""": """
# My Heading
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Phasellus ac magna non augue
@@ -42,18 +39,112 @@ velit, lobortis sed interdum at, vestibulum vitae libero \cite{fperez}.
Lorem ipsum dolor sit amet, consectetur adipiscing elit
\cite{takluyver}. Quisque iaculis ligula ut ipsum mattis viverra.
-Here is a plain paragraph that should be unaffected.
+Here is a plain paragraph that should be unaffected. It contains simple
+relations like 1<2 & 4>5.
* One \cite{jdfreder}.
* Two \cite{carreau}.
* Three \cite{ivanov}.
-"""
+""",
+
+# No citations
+r"""The quick brown fox jumps over the lazy dog.""":
+r"""The quick brown fox jumps over the lazy dog.""",
+
+# Simple inline
+r"""Foo Text bar""":
+r"""Foo \cite{asdf} bar""",
+
+# Multiline
+r"""Text
+Foo""":
+r"""\cite{ewqr}Foo""",
+
+# Nested tags
+r""" Bar""":
+r"""\cite{Foo}
Bar""",
+
+# Including Maths
+r"""Foo $3*2*1$ Text
Bar""":
+r"""Foo $3*2*1$ \cite{Foo} Bar""",
+
+# Missing end tag
+r"""Test Foo""":
+r"""\cite{asdf}""",
+
+r"""Test Foo""":
+r"""\cite{asdf}""",
+
+r"""Test Foo""":
+r"""\cite{asdf}""",
+
+# Multiple arguments
+r"""Test Foo""":
+r"""\cite{asdf} Foo""",
+
+# Wrong capitalization
+r"""Test Foo""":
+r"""\cite{asdf} Foo""",
+
+r"""Test Foo""":
+r"""\cite{asdf} Foo""",
+
+# Wrong end tag
+r""" ksjfs sdf ds """:
+r"""\cite{wer}""",
+
+r"""""":
+r"""\cite{wer}""",
+
+# Invalid tag names
+r""" """:
+r""" \cite{wer}""",
+
+# Non-nested tags
+r""" Test
Foo """:
+r""" \cite{asdf}Test
Foo """,
+
+# LXML errors
+r"""Foo
+\begin{eqnarray}
+1 & bar1 \\
+3 & 4 \\
+\end{eqnarray}""":
+r"""Foo
+\begin{eqnarray}
+1 & \cite{bar} \\
+3 & 4 \\
+\end{eqnarray}""",
+
+r"""
+1<2 is true, but 3>4 is false.
+
+$1<2$ is true, but $3>4$ is false.
+
+1<2 it is even worse if it is alone in a line.""":
+r"""
+1<2 is true, but 3>4 is false.
+
+$1<2$ is true, but $3>4$ is false.
+
+1<2 it is even worse if it is alone in a line.""",
+
+r"""
+1 < 2 is true, but 3 > 4 is false
+
+$1 < 2$ is true, but $3 > 4$ is false
+
+1 < 2 it is even worse if it is alone in a line.
+""":
+r"""
+1 < 2 is true, but 3 > 4 is false
+
+$1 < 2$ is true, but $3 > 4$ is false
+
+1 < 2 it is even worse if it is alone in a line.
+"""}
def test_citation2latex():
"""Are citations parsed properly?"""
- try:
- from lxml import html #analysis:ignore
- except ImportError:
- assert_equal(test_md, citation2latex(test_md))
- else:
- assert_equal(test_md_parsed, citation2latex(test_md))
+ for input, output in test_md.items():
+ yield (assert_equal, citation2latex(input), output)