citation.py
111 lines
| 3.5 KiB
| text/x-python
|
PythonLexer
Brian E. Granger
|
r12265 | """Citation handling for LaTeX output.""" | ||
#----------------------------------------------------------------------------- | ||||
# Copyright (c) 2013, the IPython Development Team. | ||||
# | ||||
# Distributed under the terms of the Modified BSD License. | ||||
# | ||||
# The full license is in the file COPYING.txt, distributed with this software. | ||||
#----------------------------------------------------------------------------- | ||||
#----------------------------------------------------------------------------- | ||||
jakobgager
|
r15051 | # Imports | ||
Brian E. Granger
|
r12265 | #----------------------------------------------------------------------------- | ||
Jonathan Frederic
|
r15057 | from IPython.utils.py3compat import PY3 | ||
if PY3: | ||||
from html.parser import HTMLParser | ||||
else: | ||||
from HTMLParser import HTMLParser | ||||
Brian E. Granger
|
r12265 | |||
jakobgager
|
r15051 | #----------------------------------------------------------------------------- | ||
# Functions | ||||
#----------------------------------------------------------------------------- | ||||
Brian E. Granger
|
r12265 | |||
Brian E. Granger
|
r12299 | __all__ = ['citation2latex'] | ||
Brian E. Granger
|
r12265 | |||
Brian E. Granger
|
r12299 | def citation2latex(s): | ||
Brian E. Granger
|
r12265 | """Parse citations in Markdown cells. | ||
This looks for HTML tags having a data attribute names `data-cite` | ||||
and replaces it by the call to LaTeX cite command. The tranformation | ||||
looks like this: | ||||
`<cite data-cite="granger">(Granger, 2013)</cite>` | ||||
Becomes | ||||
`\\cite{granger}` | ||||
Any HTML tag can be used, which allows the citations to be formatted | ||||
in HTML in any manner. | ||||
""" | ||||
jakobgager
|
r15051 | parser = CitationParser() | ||
parser.feed(s) | ||||
parser.close() | ||||
outtext = u'' | ||||
startpos = 0 | ||||
for citation in parser.citelist: | ||||
outtext += s[startpos:citation[1]] | ||||
outtext += '\\cite{%s}'%citation[0] | ||||
jakobgager
|
r15053 | startpos = citation[2] if len(citation)==3 else -1 | ||
Jonathan Frederic
|
r15056 | outtext += s[startpos:] if startpos != -1 else '' | ||
jakobgager
|
r15051 | return outtext | ||
Brian E. Granger
|
r12265 | |||
jakobgager
|
r15051 | #----------------------------------------------------------------------------- | ||
# Classes | ||||
#----------------------------------------------------------------------------- | ||||
class CitationParser(HTMLParser): | ||||
"""Citation Parser | ||||
Brian E. Granger
|
r12265 | |||
jakobgager
|
r15051 | Replaces html tags with data-cite attribute with respective latex \\cite. | ||
Brian E. Granger
|
r12265 | |||
jakobgager
|
r15051 | Inherites from HTMLParser, overrides: | ||
- handle_starttag | ||||
- handle_endtag | ||||
""" | ||||
# number of open tags | ||||
opentags = None | ||||
# list of found citations | ||||
citelist = None | ||||
# active citation tag | ||||
citetag = None | ||||
def __init__(self): | ||||
self.citelist = [] | ||||
self.opentags = 0 | ||||
HTMLParser.__init__(self) | ||||
Brian E. Granger
|
r12265 | |||
jakobgager
|
r15051 | def get_offset(self): | ||
# Compute startposition in source | ||||
lin, offset = self.getpos() | ||||
pos = 0 | ||||
for i in range(lin-1): | ||||
pos = self.data.find('\n',pos) + 1 | ||||
return pos + offset | ||||
def handle_starttag(self, tag, attrs): | ||||
# for each tag check if attributes are present and if no citation is active | ||||
if self.opentags == 0 and len(attrs)>0: | ||||
for atr, data in attrs: | ||||
if atr.lower() == 'data-cite': | ||||
self.citetag = tag | ||||
self.opentags = 1 | ||||
self.citelist.append([data, self.get_offset()]) | ||||
return | ||||
if tag == self.citetag: | ||||
# found an open citation tag but not the starting one | ||||
self.opentags += 1 | ||||
def handle_endtag(self, tag): | ||||
if tag == self.citetag: | ||||
# found citation tag check if starting one | ||||
if self.opentags == 1: | ||||
pos = self.get_offset() | ||||
self.citelist[-1].append(pos+len(tag)+3) | ||||
self.opentags -= 1 | ||||
def feed(self, data): | ||||
self.data = data | ||||
HTMLParser.feed(self, data) | ||||