upstream/ipython Commit - r15051:c6fe9d02

Replace lxml with HTMLParser in citation2latex

jakobgager -

r15051:c6fe9d02

parent child

IPython/nbconvert/filters/citation.py

0 +74 -39

		@@ -9,9 +9,13
9	9	#-----------------------------------------------------------------------------
10	10
11	11	#-----------------------------------------------------------------------------
12		# Code
	12	# Imports
13	13	#-----------------------------------------------------------------------------
	14	from HTMLParser import HTMLParser
14	15
	16	#-----------------------------------------------------------------------------
	17	# Functions
	18	#-----------------------------------------------------------------------------
15	19
16	20	__all__ = ['citation2latex']
17	21
		@@ -32,41 +36,72 def citation2latex(s):
32	36	Any HTML tag can be used, which allows the citations to be formatted
33	37	in HTML in any manner.
34	38	"""
35		try:
36		from lxml import html
37		except ImportError:
38		return s
39
40		tree = html.fragment_fromstring(s, create_parent='div')
41		_process_node_cite(tree)
42		s = html.tostring(tree, encoding='unicode')
43		if s.endswith('</div>'):
44		s = s[:-6]
45		if s.startswith('<div>'):
46		s = s[5:]
47		return s
48
49
50		def _process_node_cite(node):
51		"""Do the citation replacement as we walk the lxml tree."""
52
53		def _get(o, name):
54		value = getattr(o, name, None)
55		return '' if value is None else value
56
57		if 'data-cite' in node.attrib:
58		cite = '\cite{%(ref)s}' % {'ref': node.attrib['data-cite']}
59		prev = node.getprevious()
60		if prev is not None:
61		prev.tail = _get(prev, 'tail') + cite + _get(node, 'tail')
62		else:
63		parent = node.getparent()
64		if parent is not None:
65		parent.text = _get(parent, 'text') + cite + _get(node, 'tail')
66		try:
67		node.getparent().remove(node)
68		except AttributeError:
69		pass
70		~~else~~:
71		for child in node:
72		_process_node_cite(child)
	39	parser = CitationParser()
	40	parser.feed(s)
	41	parser.close()
	42	outtext = u''
	43	startpos = 0
	44	for citation in parser.citelist:
	45	outtext += s[startpos:citation[1]]
	46	outtext += '\\cite{%s}'%citation[0]
	47	startpos = citation[2]
	48	outtext += s[startpos:]
	49	return outtext
	50
	51	#-----------------------------------------------------------------------------
	52	# Classes
	53	#-----------------------------------------------------------------------------
	54	class CitationParser(HTMLParser):
	55	"""Citation Parser
	56
	57	Replaces html tags with data-cite attribute with respective latex \\cite.
	58
	59	Inherites from HTMLParser, overrides:
	60	- handle_starttag
	61	- handle_endtag
	62	"""
	63	# number of open tags
	64	opentags = None
	65	# list of found citations
	66	citelist = None
	67	# active citation tag
	68	citetag = None
	69
	70	def __init__(self):
	71	self.citelist = []
	72	self.opentags = 0
	73	HTMLParser.__init__(self)
	74
	75	def get_offset(self):
	76	# Compute startposition in source
	77	lin, offset = self.getpos()
	78	pos = 0
	79	for i in range(lin-1):
	80	pos = self.data.find('\n',pos) + 1
	81	return pos + offset
	82
	83	def handle_starttag(self, tag, attrs):
	84	# for each tag check if attributes are present and if no citation is active
	85	if self.opentags == 0 and len(attrs)>0:
	86	for atr, data in attrs:
	87	if atr.lower() == 'data-cite':
	88	self.citetag = tag
	89	self.opentags = 1
	90	self.citelist.append([data, self.get_offset()])
	91	return
	92
	93	if tag == self.citetag:
	94	# found an open citation tag but not the starting one
	95	self.opentags += 1
	96
	97	def handle_endtag(self, tag):
	98	if tag == self.citetag:
	99	# found citation tag check if starting one
	100	if self.opentags == 1:
	101	pos = self.get_offset()
	102	self.citelist[-1].append(pos+len(tag)+3)
	103	self.opentags -= 1
	104
	105	def feed(self, data):
	106	self.data = data
	107	HTMLParser.feed(self, data)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages