##// END OF EJS Templates
Import HTMLParser from html.parser for Python 3.x
Jonathan Frederic -
Show More
@@ -1,107 +1,111 b''
1 1 """Citation handling for LaTeX output."""
2 2
3 3 #-----------------------------------------------------------------------------
4 4 # Copyright (c) 2013, the IPython Development Team.
5 5 #
6 6 # Distributed under the terms of the Modified BSD License.
7 7 #
8 8 # The full license is in the file COPYING.txt, distributed with this software.
9 9 #-----------------------------------------------------------------------------
10 10
11 11 #-----------------------------------------------------------------------------
12 12 # Imports
13 13 #-----------------------------------------------------------------------------
14 from IPython.utils.py3compat import PY3
15 if PY3:
16 from html.parser import HTMLParser
17 else:
14 18 from HTMLParser import HTMLParser
15 19
16 20 #-----------------------------------------------------------------------------
17 21 # Functions
18 22 #-----------------------------------------------------------------------------
19 23
20 24 __all__ = ['citation2latex']
21 25
22 26
23 27 def citation2latex(s):
24 28 """Parse citations in Markdown cells.
25 29
26 30 This looks for HTML tags having a data attribute names `data-cite`
27 31 and replaces it by the call to LaTeX cite command. The tranformation
28 32 looks like this:
29 33
30 34 `<cite data-cite="granger">(Granger, 2013)</cite>`
31 35
32 36 Becomes
33 37
34 38 `\\cite{granger}`
35 39
36 40 Any HTML tag can be used, which allows the citations to be formatted
37 41 in HTML in any manner.
38 42 """
39 43 parser = CitationParser()
40 44 parser.feed(s)
41 45 parser.close()
42 46 outtext = u''
43 47 startpos = 0
44 48 for citation in parser.citelist:
45 49 outtext += s[startpos:citation[1]]
46 50 outtext += '\\cite{%s}'%citation[0]
47 51 startpos = citation[2] if len(citation)==3 else -1
48 52 outtext += s[startpos:] if startpos != -1 else ''
49 53 return outtext
50 54
51 55 #-----------------------------------------------------------------------------
52 56 # Classes
53 57 #-----------------------------------------------------------------------------
54 58 class CitationParser(HTMLParser):
55 59 """Citation Parser
56 60
57 61 Replaces html tags with data-cite attribute with respective latex \\cite.
58 62
59 63 Inherites from HTMLParser, overrides:
60 64 - handle_starttag
61 65 - handle_endtag
62 66 """
63 67 # number of open tags
64 68 opentags = None
65 69 # list of found citations
66 70 citelist = None
67 71 # active citation tag
68 72 citetag = None
69 73
70 74 def __init__(self):
71 75 self.citelist = []
72 76 self.opentags = 0
73 77 HTMLParser.__init__(self)
74 78
75 79 def get_offset(self):
76 80 # Compute startposition in source
77 81 lin, offset = self.getpos()
78 82 pos = 0
79 83 for i in range(lin-1):
80 84 pos = self.data.find('\n',pos) + 1
81 85 return pos + offset
82 86
83 87 def handle_starttag(self, tag, attrs):
84 88 # for each tag check if attributes are present and if no citation is active
85 89 if self.opentags == 0 and len(attrs)>0:
86 90 for atr, data in attrs:
87 91 if atr.lower() == 'data-cite':
88 92 self.citetag = tag
89 93 self.opentags = 1
90 94 self.citelist.append([data, self.get_offset()])
91 95 return
92 96
93 97 if tag == self.citetag:
94 98 # found an open citation tag but not the starting one
95 99 self.opentags += 1
96 100
97 101 def handle_endtag(self, tag):
98 102 if tag == self.citetag:
99 103 # found citation tag check if starting one
100 104 if self.opentags == 1:
101 105 pos = self.get_offset()
102 106 self.citelist[-1].append(pos+len(tag)+3)
103 107 self.opentags -= 1
104 108
105 109 def feed(self, data):
106 110 self.data = data
107 111 HTMLParser.feed(self, data)
General Comments 0
You need to be logged in to leave comments. Login now