upstream/ipython Commit - r12428:6d6f830e

1

# coding: utf-8

1

# coding: utf-8

2

"""String filters.

2

"""String filters.

3

4

Contains a collection of useful string manipulation filters for use in Jinja

4

Contains a collection of useful string manipulation filters for use in Jinja

5

templates.

5

templates.

6

"""

6

"""

7

#-----------------------------------------------------------------------------

7

#-----------------------------------------------------------------------------

8

9

#

9

#

10

# Distributed under the terms of the Modified BSD License.

10

# Distributed under the terms of the Modified BSD License.

11

#

11

#

12

# The full license is in the file COPYING.txt, distributed with this software.

12

# The full license is in the file COPYING.txt, distributed with this software.

13

#-----------------------------------------------------------------------------

13

#-----------------------------------------------------------------------------

14

15

#-----------------------------------------------------------------------------

15

#-----------------------------------------------------------------------------

16

# Imports

16

# Imports

17

#-----------------------------------------------------------------------------

17

#-----------------------------------------------------------------------------

18

19

import os

19

import os

20

import re

20

import re

21

import textwrap

21

import textwrap

22

from xml.etree import ElementTree

22

from xml.etree import ElementTree

23

24

from IPython.core.interactiveshell import InteractiveShell

24

from IPython.core.interactiveshell import InteractiveShell

25

from IPython.utils import py3compat

25

from IPython.utils import py3compat

26

27

#-----------------------------------------------------------------------------

27

#-----------------------------------------------------------------------------

28

# Functions

28

# Functions

29

#-----------------------------------------------------------------------------

29

#-----------------------------------------------------------------------------

30

31

__all__ = [

31

__all__ = [

32

'wrap_text',

32

'wrap_text',

33

'html2text',

33

'html2text',

34

'add_anchor',

34

'add_anchor',

35

'strip_dollars',

35

'strip_dollars',

36

'strip_files_prefix',

36

'strip_files_prefix',

37

'comment_lines',

37

'comment_lines',

38

'get_lines',

38

'get_lines',

39

'ipython2python',

39

'ipython2python',

40

'posix_path',

40

'posix_path',

41

]

41

]

42

43

44

def wrap_text(text, width=100):

44

def wrap_text(text, width=100):

45

"""

45

"""

46

Intelligently wrap text.

46

Intelligently wrap text.

47

Wrap text without breaking words if possible.

47

Wrap text without breaking words if possible.

48

49

Parameters

49

Parameters

50

----------

50

----------

51

text : str

51

text : str

52

Text to wrap.

52

Text to wrap.

53

width : int, optional

53

width : int, optional

54

Number of characters to wrap to, default 100.

54

Number of characters to wrap to, default 100.

55

"""

55

"""

56

57

split_text = text.split('\n')

57

split_text = text.split('\n')

58

wrp = map(lambda x:textwrap.wrap(x,width), split_text)

58

wrp = map(lambda x:textwrap.wrap(x,width), split_text)

59

wrpd = map('\n'.join, wrp)

59

wrpd = map('\n'.join, wrp)

60

return '\n'.join(wrpd)

60

return '\n'.join(wrpd)

61

62

63

def html2text(element):

63

def html2text(element):

64

"""extract inner text from html

64

"""extract inner text from html

65

66

Analog of jQuery's $(element).text()

66

Analog of jQuery's $(element).text()

67

"""

67

"""

68

if isinstance(element, py3compat.string_types):

68

if isinstance(element, py3compat.string_types):

69

element = ElementTree.fromstring(element)

69

element = ElementTree.fromstring(element)

70

71

text = element.text or ""

71

text = element.text or ""

72

for child in element:

72

for child in element:

73

text += html2text(child)

73

text += html2text(child)

74

text += (element.tail or "")

74

text += (element.tail or "")

75

return text

75

return text

76

77

78

def add_anchor(html):

78

def add_anchor(html):

79

"""Add an anchor-link to an html header tag

79

"""Add an anchor-link to an html header tag

80

81

For use in heading cells

81

For use in heading cells

82

"""

82

"""

83

h = ElementTree.fromstring(py3compat.cast_bytes_py2(html))

83

h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))

84

link = html2text(h).replace(' ', '-')

84

link = html2text(h).replace(' ', '-')

85

h.set('id', link)

85

h.set('id', link)

86

a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})

86

a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})

87

a.text = u'¶'

87

a.text = u'¶'

88

h.append(a)

88

h.append(a)

89

90

# Known issue of Python3.x, ElementTree.tostring() returns a byte string

90

# Known issue of Python3.x, ElementTree.tostring() returns a byte string

91

# instead of a text string. See issue http://bugs.python.org/issue10942

91

# instead of a text string. See issue http://bugs.python.org/issue10942

92

# Workaround is to make sure the bytes are casted to a string.

92

# Workaround is to make sure the bytes are casted to a string.

93

return py3compat.decode(ElementTree.tostring(h), 'utf-8')

93

return py3compat.decode(ElementTree.tostring(h), 'utf-8')

94

95

96

def strip_dollars(text):

96

def strip_dollars(text):

97

"""

97

"""

98

Remove all dollar symbols from text

98

Remove all dollar symbols from text

99

100

Parameters

100

Parameters

101

----------

101

----------

102

text : str

102

text : str

103

Text to remove dollars from

103

Text to remove dollars from

104

"""

104

"""

105

106

return text.strip('$')

106

return text.strip('$')

107

108

109

files_url_pattern = re.compile(r'(src|href)\=([\'"]?)files/')

109

files_url_pattern = re.compile(r'(src|href)\=([\'"]?)files/')

110

111

def strip_files_prefix(text):

111

def strip_files_prefix(text):

112

"""

112

"""

113

Fix all fake URLs that start with `files/`,

113

Fix all fake URLs that start with `files/`,

114

stripping out the `files/` prefix.

114

stripping out the `files/` prefix.

115

116

Parameters

116

Parameters

117

----------

117

----------

118

text : str

118

text : str

119

Text in which to replace 'src="files/real...' with 'src="real...'

119

Text in which to replace 'src="files/real...' with 'src="real...'

120

"""

120

"""

121

return files_url_pattern.sub(r"\1=\2", text)

121

return files_url_pattern.sub(r"\1=\2", text)

122

123

124

def comment_lines(text, prefix='# '):

124

def comment_lines(text, prefix='# '):

125

"""

125

"""

126

Build a Python comment line from input text.

126

Build a Python comment line from input text.

127

128

Parameters

128

Parameters

129

----------

129

----------

130

text : str

130

text : str

131

Text to comment out.

131

Text to comment out.

132

prefix : str

132

prefix : str

133

Character to append to the start of each line.

133

Character to append to the start of each line.

134

"""

134

"""

135

136

#Replace line breaks with line breaks and comment symbols.

136

#Replace line breaks with line breaks and comment symbols.

137

#Also add a comment symbol at the beginning to comment out

137

#Also add a comment symbol at the beginning to comment out

138

#the first line.

138

#the first line.

139

return prefix + ('\n'+prefix).join(text.split('\n'))

139

return prefix + ('\n'+prefix).join(text.split('\n'))

140

141

142

def get_lines(text, start=None,end=None):

142

def get_lines(text, start=None,end=None):

143

"""

143

"""

144

Split the input text into separate lines and then return the

144

Split the input text into separate lines and then return the

145

lines that the caller is interested in.

145

lines that the caller is interested in.

146

147

Parameters

147

Parameters

148

----------

148

----------

149

text : str

149

text : str

150

Text to parse lines from.

150

Text to parse lines from.

151

start : int, optional

151

start : int, optional

152

First line to grab from.

152

First line to grab from.

153

end : int, optional

153

end : int, optional

154

Last line to grab from.

154

Last line to grab from.

155

"""

155

"""

156

157

# Split the input into lines.

157

# Split the input into lines.

158

lines = text.split("\n")

158

lines = text.split("\n")

159

160

# Return the right lines.

160

# Return the right lines.

161

return "\n".join(lines[start:end]) #re-join

161

return "\n".join(lines[start:end]) #re-join

162

163

def ipython2python(code):

163

def ipython2python(code):

164

"""Transform IPython syntax to pure Python syntax

164

"""Transform IPython syntax to pure Python syntax

165

166

Parameters

166

Parameters

167

----------

167

----------

168

169

code : str

169

code : str

170

IPython code, to be transformed to pure Python

170

IPython code, to be transformed to pure Python

171

"""

171

"""

172

shell = InteractiveShell.instance()

172

shell = InteractiveShell.instance()

173

return shell.input_transformer_manager.transform_cell(code)

173

return shell.input_transformer_manager.transform_cell(code)

174

175

def posix_path(path):

175

def posix_path(path):

176

"""Turn a path into posix-style path/to/etc

176

"""Turn a path into posix-style path/to/etc

177

178

Mainly for use in latex on Windows,

178

Mainly for use in latex on Windows,

179

where native Windows paths are not allowed.

179

where native Windows paths are not allowed.

180

"""

180

"""

181

if os.path.sep != '/':

181

if os.path.sep != '/':

182

return path.replace(os.path.sep, '/')

182

return path.replace(os.path.sep, '/')

183

return path

183

return path

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # coding: utf-8
             """String filters.
             Contains a collection of useful string manipulation filters for use in Jinja
             templates.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             import os
             import re
             import textwrap
             from xml.etree import ElementTree
             from IPython.core.interactiveshell import InteractiveShell
             from IPython.utils import py3compat
             #-----------------------------------------------------------------------------
             # Functions
             #-----------------------------------------------------------------------------
             __all__ = [
                 'wrap_text',
                 'html2text',
                 'add_anchor',
                 'strip_dollars',
                 'strip_files_prefix',
                 'comment_lines',
                 'get_lines',
                 'ipython2python',
                 'posix_path',
             ]
             def wrap_text(text, width=100):
                 """
                 Intelligently wrap text.
                 Wrap text without breaking words if possible.
                 Parameters
                 ----------
                 text : str
                     Text to wrap.
                 width : int, optional
                     Number of characters to wrap to, default 100.
                 """
                 split_text = text.split('\n')
                 wrp = map(lambda x:textwrap.wrap(x,width), split_text)
                 wrpd = map('\n'.join, wrp)
                 return '\n'.join(wrpd)
             def html2text(element):
                 """extract inner text from html
                 Analog of jQuery's $(element).text()
                 """
                 if isinstance(element, py3compat.string_types):
                     element = ElementTree.fromstring(element)
                 text = element.text or ""
                 for child in element:
                     text += html2text(child)
                 text += (element.tail or "")
                 return text
             def add_anchor(html):
                 """Add an anchor-link to an html header tag
                 For use in heading cells
                 """
-                h = ElementTree.fromstring(py3compat.cast_bytes_py2(html))
+                h = ElementTree.fromstring(py3compat.cast_bytes_py2(html, encoding='utf-8'))
                 link = html2text(h).replace(' ', '-')
                 h.set('id', link)
                 a = ElementTree.Element("a", {"class" : "anchor-link", "href" : "#" + link})
                 a.text = u'¶'
                 h.append(a)
                 # Known issue of Python3.x, ElementTree.tostring() returns a byte string
                 # instead of a text string.  See issue http://bugs.python.org/issue10942
                 # Workaround is to make sure the bytes are casted to a string.
                 return py3compat.decode(ElementTree.tostring(h), 'utf-8')
             def strip_dollars(text):
                 """
                 Remove all dollar symbols from text
                 Parameters
                 ----------
                 text : str
                     Text to remove dollars from
                 """
                 return text.strip('$')
             files_url_pattern = re.compile(r'(src|href)\=([\'"]?)files/')
             def strip_files_prefix(text):
                 """
                 Fix all fake URLs that start with `files/`,
                 stripping out the `files/` prefix.
                 Parameters
                 ----------
                 text : str
                     Text in which to replace 'src="files/real...' with 'src="real...'
                 """
                 return files_url_pattern.sub(r"\1=\2", text)
             def comment_lines(text, prefix='# '):
                 """
                 Build a Python comment line from input text.
                 Parameters
                 ----------
                 text : str
                     Text to comment out.
                 prefix : str
                     Character to append to the start of each line.
                 """
                 #Replace line breaks with line breaks and comment symbols.
                 #Also add a comment symbol at the beginning to comment out
                 #the first line.
                 return prefix + ('\n'+prefix).join(text.split('\n'))
             def get_lines(text, start=None,end=None):
                 """
                 Split the input text into separate lines and then return the
                 lines that the caller is interested in.
                 Parameters
                 ----------
                 text : str
                     Text to parse lines from.
                 start : int, optional
                     First line to grab from.
                 end : int, optional
                     Last line to grab from.
                 """
                 # Split the input into lines.
                 lines = text.split("\n")
                 # Return the right lines.
                 return "\n".join(lines[start:end]) #re-join
             def ipython2python(code):
                 """Transform IPython syntax to pure Python syntax
                 Parameters
                 ----------
                 code : str
                     IPython code, to be transformed to pure Python
                 """
                 shell = InteractiveShell.instance()
                 return shell.input_transformer_manager.transform_cell(code)
             def posix_path(path):
                 """Turn a path into posix-style path/to/etc
                 Mainly for use in latex on Windows,
                 where native Windows paths are not allowed.
                 """
                 if os.path.sep != '/':
                     return path.replace(os.path.sep, '/')
                 return path