##// END OF EJS Templates
Import HTMLParser from html.parser for Python 3.x
Jonathan Frederic -
Show More
@@ -1,107 +1,111 b''
1 """Citation handling for LaTeX output."""
1 """Citation handling for LaTeX output."""
2
2
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (c) 2013, the IPython Development Team.
4 # Copyright (c) 2013, the IPython Development Team.
5 #
5 #
6 # Distributed under the terms of the Modified BSD License.
6 # Distributed under the terms of the Modified BSD License.
7 #
7 #
8 # The full license is in the file COPYING.txt, distributed with this software.
8 # The full license is in the file COPYING.txt, distributed with this software.
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Imports
12 # Imports
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 from IPython.utils.py3compat import PY3
15 if PY3:
16 from html.parser import HTMLParser
17 else:
14 from HTMLParser import HTMLParser
18 from HTMLParser import HTMLParser
15
19
16 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
17 # Functions
21 # Functions
18 #-----------------------------------------------------------------------------
22 #-----------------------------------------------------------------------------
19
23
20 __all__ = ['citation2latex']
24 __all__ = ['citation2latex']
21
25
22
26
23 def citation2latex(s):
27 def citation2latex(s):
24 """Parse citations in Markdown cells.
28 """Parse citations in Markdown cells.
25
29
26 This looks for HTML tags having a data attribute names `data-cite`
30 This looks for HTML tags having a data attribute names `data-cite`
27 and replaces it by the call to LaTeX cite command. The tranformation
31 and replaces it by the call to LaTeX cite command. The tranformation
28 looks like this:
32 looks like this:
29
33
30 `<cite data-cite="granger">(Granger, 2013)</cite>`
34 `<cite data-cite="granger">(Granger, 2013)</cite>`
31
35
32 Becomes
36 Becomes
33
37
34 `\\cite{granger}`
38 `\\cite{granger}`
35
39
36 Any HTML tag can be used, which allows the citations to be formatted
40 Any HTML tag can be used, which allows the citations to be formatted
37 in HTML in any manner.
41 in HTML in any manner.
38 """
42 """
39 parser = CitationParser()
43 parser = CitationParser()
40 parser.feed(s)
44 parser.feed(s)
41 parser.close()
45 parser.close()
42 outtext = u''
46 outtext = u''
43 startpos = 0
47 startpos = 0
44 for citation in parser.citelist:
48 for citation in parser.citelist:
45 outtext += s[startpos:citation[1]]
49 outtext += s[startpos:citation[1]]
46 outtext += '\\cite{%s}'%citation[0]
50 outtext += '\\cite{%s}'%citation[0]
47 startpos = citation[2] if len(citation)==3 else -1
51 startpos = citation[2] if len(citation)==3 else -1
48 outtext += s[startpos:] if startpos != -1 else ''
52 outtext += s[startpos:] if startpos != -1 else ''
49 return outtext
53 return outtext
50
54
51 #-----------------------------------------------------------------------------
55 #-----------------------------------------------------------------------------
52 # Classes
56 # Classes
53 #-----------------------------------------------------------------------------
57 #-----------------------------------------------------------------------------
54 class CitationParser(HTMLParser):
58 class CitationParser(HTMLParser):
55 """Citation Parser
59 """Citation Parser
56
60
57 Replaces html tags with data-cite attribute with respective latex \\cite.
61 Replaces html tags with data-cite attribute with respective latex \\cite.
58
62
59 Inherites from HTMLParser, overrides:
63 Inherites from HTMLParser, overrides:
60 - handle_starttag
64 - handle_starttag
61 - handle_endtag
65 - handle_endtag
62 """
66 """
63 # number of open tags
67 # number of open tags
64 opentags = None
68 opentags = None
65 # list of found citations
69 # list of found citations
66 citelist = None
70 citelist = None
67 # active citation tag
71 # active citation tag
68 citetag = None
72 citetag = None
69
73
70 def __init__(self):
74 def __init__(self):
71 self.citelist = []
75 self.citelist = []
72 self.opentags = 0
76 self.opentags = 0
73 HTMLParser.__init__(self)
77 HTMLParser.__init__(self)
74
78
75 def get_offset(self):
79 def get_offset(self):
76 # Compute startposition in source
80 # Compute startposition in source
77 lin, offset = self.getpos()
81 lin, offset = self.getpos()
78 pos = 0
82 pos = 0
79 for i in range(lin-1):
83 for i in range(lin-1):
80 pos = self.data.find('\n',pos) + 1
84 pos = self.data.find('\n',pos) + 1
81 return pos + offset
85 return pos + offset
82
86
83 def handle_starttag(self, tag, attrs):
87 def handle_starttag(self, tag, attrs):
84 # for each tag check if attributes are present and if no citation is active
88 # for each tag check if attributes are present and if no citation is active
85 if self.opentags == 0 and len(attrs)>0:
89 if self.opentags == 0 and len(attrs)>0:
86 for atr, data in attrs:
90 for atr, data in attrs:
87 if atr.lower() == 'data-cite':
91 if atr.lower() == 'data-cite':
88 self.citetag = tag
92 self.citetag = tag
89 self.opentags = 1
93 self.opentags = 1
90 self.citelist.append([data, self.get_offset()])
94 self.citelist.append([data, self.get_offset()])
91 return
95 return
92
96
93 if tag == self.citetag:
97 if tag == self.citetag:
94 # found an open citation tag but not the starting one
98 # found an open citation tag but not the starting one
95 self.opentags += 1
99 self.opentags += 1
96
100
97 def handle_endtag(self, tag):
101 def handle_endtag(self, tag):
98 if tag == self.citetag:
102 if tag == self.citetag:
99 # found citation tag check if starting one
103 # found citation tag check if starting one
100 if self.opentags == 1:
104 if self.opentags == 1:
101 pos = self.get_offset()
105 pos = self.get_offset()
102 self.citelist[-1].append(pos+len(tag)+3)
106 self.citelist[-1].append(pos+len(tag)+3)
103 self.opentags -= 1
107 self.opentags -= 1
104
108
105 def feed(self, data):
109 def feed(self, data):
106 self.data = data
110 self.data = data
107 HTMLParser.feed(self, data)
111 HTMLParser.feed(self, data)
General Comments 0
You need to be logged in to leave comments. Login now