##// END OF EJS Templates
Replace lxml with HTMLParser in citation2latex
jakobgager -
Show More
@@ -1,72 +1,107
1 """Citation handling for LaTeX output."""
1 """Citation handling for LaTeX output."""
2
2
3 #-----------------------------------------------------------------------------
3 #-----------------------------------------------------------------------------
4 # Copyright (c) 2013, the IPython Development Team.
4 # Copyright (c) 2013, the IPython Development Team.
5 #
5 #
6 # Distributed under the terms of the Modified BSD License.
6 # Distributed under the terms of the Modified BSD License.
7 #
7 #
8 # The full license is in the file COPYING.txt, distributed with this software.
8 # The full license is in the file COPYING.txt, distributed with this software.
9 #-----------------------------------------------------------------------------
9 #-----------------------------------------------------------------------------
10
10
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Code
12 # Imports
13 #-----------------------------------------------------------------------------
13 #-----------------------------------------------------------------------------
14 from HTMLParser import HTMLParser
14
15
16 #-----------------------------------------------------------------------------
17 # Functions
18 #-----------------------------------------------------------------------------
15
19
16 __all__ = ['citation2latex']
20 __all__ = ['citation2latex']
17
21
18
22
19 def citation2latex(s):
23 def citation2latex(s):
20 """Parse citations in Markdown cells.
24 """Parse citations in Markdown cells.
21
25
22 This looks for HTML tags having a data attribute names `data-cite`
26 This looks for HTML tags having a data attribute names `data-cite`
23 and replaces it by the call to LaTeX cite command. The tranformation
27 and replaces it by the call to LaTeX cite command. The tranformation
24 looks like this:
28 looks like this:
25
29
26 `<cite data-cite="granger">(Granger, 2013)</cite>`
30 `<cite data-cite="granger">(Granger, 2013)</cite>`
27
31
28 Becomes
32 Becomes
29
33
30 `\\cite{granger}`
34 `\\cite{granger}`
31
35
32 Any HTML tag can be used, which allows the citations to be formatted
36 Any HTML tag can be used, which allows the citations to be formatted
33 in HTML in any manner.
37 in HTML in any manner.
34 """
38 """
35 try:
39 parser = CitationParser()
36 from lxml import html
40 parser.feed(s)
37 except ImportError:
41 parser.close()
38 return s
42 outtext = u''
39
43 startpos = 0
40 tree = html.fragment_fromstring(s, create_parent='div')
44 for citation in parser.citelist:
41 _process_node_cite(tree)
45 outtext += s[startpos:citation[1]]
42 s = html.tostring(tree, encoding='unicode')
46 outtext += '\\cite{%s}'%citation[0]
43 if s.endswith('</div>'):
47 startpos = citation[2]
44 s = s[:-6]
48 outtext += s[startpos:]
45 if s.startswith('<div>'):
49 return outtext
46 s = s[5:]
50
47 return s
51 #-----------------------------------------------------------------------------
48
52 # Classes
49
53 #-----------------------------------------------------------------------------
50 def _process_node_cite(node):
54 class CitationParser(HTMLParser):
51 """Do the citation replacement as we walk the lxml tree."""
55 """Citation Parser
52
56
53 def _get(o, name):
57 Replaces html tags with data-cite attribute with respective latex \\cite.
54 value = getattr(o, name, None)
58
55 return '' if value is None else value
59 Inherites from HTMLParser, overrides:
56
60 - handle_starttag
57 if 'data-cite' in node.attrib:
61 - handle_endtag
58 cite = '\cite{%(ref)s}' % {'ref': node.attrib['data-cite']}
62 """
59 prev = node.getprevious()
63 # number of open tags
60 if prev is not None:
64 opentags = None
61 prev.tail = _get(prev, 'tail') + cite + _get(node, 'tail')
65 # list of found citations
62 else:
66 citelist = None
63 parent = node.getparent()
67 # active citation tag
64 if parent is not None:
68 citetag = None
65 parent.text = _get(parent, 'text') + cite + _get(node, 'tail')
69
66 try:
70 def __init__(self):
67 node.getparent().remove(node)
71 self.citelist = []
68 except AttributeError:
72 self.opentags = 0
69 pass
73 HTMLParser.__init__(self)
70 else:
74
71 for child in node:
75 def get_offset(self):
72 _process_node_cite(child)
76 # Compute startposition in source
77 lin, offset = self.getpos()
78 pos = 0
79 for i in range(lin-1):
80 pos = self.data.find('\n',pos) + 1
81 return pos + offset
82
83 def handle_starttag(self, tag, attrs):
84 # for each tag check if attributes are present and if no citation is active
85 if self.opentags == 0 and len(attrs)>0:
86 for atr, data in attrs:
87 if atr.lower() == 'data-cite':
88 self.citetag = tag
89 self.opentags = 1
90 self.citelist.append([data, self.get_offset()])
91 return
92
93 if tag == self.citetag:
94 # found an open citation tag but not the starting one
95 self.opentags += 1
96
97 def handle_endtag(self, tag):
98 if tag == self.citetag:
99 # found citation tag check if starting one
100 if self.opentags == 1:
101 pos = self.get_offset()
102 self.citelist[-1].append(pos+len(tag)+3)
103 self.opentags -= 1
104
105 def feed(self, data):
106 self.data = data
107 HTMLParser.feed(self, data)
General Comments 0
You need to be logged in to leave comments. Login now