upstream/ipython Commit - r2670:251d7553

1

"""Analysis of text input into executable blocks.

1

"""Analysis of text input into executable blocks.

2

3

This is a simple example of how an interactive terminal-based client can use

3

This is a simple example of how an interactive terminal-based client can use

4

this tool::

4

this tool::

5

6

bb = BlockBreaker()

6

bb = BlockBreaker()

7

while not bb.interactive_block_ready():

7

while not bb.interactive_block_ready():

8

bb.push(raw_input('>>> '))

8

bb.push(raw_input('>>> '))

9

print 'Input source was:\n', bb.source,

9

print 'Input source was:\n', bb.source,

10

"""

10

"""

11

#-----------------------------------------------------------------------------

11

#-----------------------------------------------------------------------------

12

13

#

13

#

14

# Distributed under the terms of the BSD License. The full license is in

14

# Distributed under the terms of the BSD License. The full license is in

15

# the file COPYING, distributed as part of this software.

15

# the file COPYING, distributed as part of this software.

16

#-----------------------------------------------------------------------------

16

#-----------------------------------------------------------------------------

17

18

#-----------------------------------------------------------------------------

18

#-----------------------------------------------------------------------------

19

# Imports

19

# Imports

20

#-----------------------------------------------------------------------------

20

#-----------------------------------------------------------------------------

21

# stdlib

21

# stdlib

22

import codeop

22

import codeop

23

import re

23

import re

24

import sys

24

import sys

25

26

#-----------------------------------------------------------------------------

26

#-----------------------------------------------------------------------------

27

# Utilities

27

# Utilities

28

#-----------------------------------------------------------------------------

28

#-----------------------------------------------------------------------------

29

30

# FIXME: move these utilities to the general ward...

30

# FIXME: move these utilities to the general ward...

31

32

# compiled regexps for autoindent management

32

# compiled regexps for autoindent management

33

dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')

33

dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')

34

ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')

34

ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')

35

36

37

def num_ini_spaces(s):

37

def num_ini_spaces(s):

38

"""Return the number of initial spaces in a string.

38

"""Return the number of initial spaces in a string.

39

40

Note that tabs are counted as a single space. For now, we do *not* support

40

Note that tabs are counted as a single space. For now, we do *not* support

41

mixing of tabs and spaces in the user's input.

41

mixing of tabs and spaces in the user's input.

42

43

Parameters

43

Parameters

44

----------

44

----------

45

s : string

45

s : string

46

"""

46

"""

47

48

ini_spaces = ini_spaces_re.match(s)

48

ini_spaces = ini_spaces_re.match(s)

49

if ini_spaces:

49

if ini_spaces:

50

return ini_spaces.end()

50

return ini_spaces.end()

51

else:

51

else:

52

return 0

52

return 0

53

54

55

def remove_comments(src):

55

def remove_comments(src):

56

"""Remove all comments from input source.

56

"""Remove all comments from input source.

57

58

Note: comments are NOT recognized inside of strings!

58

Note: comments are NOT recognized inside of strings!

59

60

Parameters

60

Parameters

61

----------

61

----------

62

src : string

62

src : string

63

A single or multiline input string.

63

A single or multiline input string.

64

65

Returns

65

Returns

66

-------

66

-------

67

String with all Python comments removed.

67

String with all Python comments removed.

68

"""

68

"""

69

70

return re.sub('#.*', '', src)

70

return re.sub('#.*', '', src)

71

72

73

def get_input_encoding():

73

def get_input_encoding():

74

"""Return the default standard input encoding."""

74

"""Return the default standard input encoding."""

75

return getattr(sys.stdin, 'encoding', 'ascii')

75

76

# There are strange environments for which sys.stdin.encoding is None. We

77

# ensure that a valid encoding is returned.

78

encoding = getattr(sys.stdin, 'encoding', None)

79

if encoding is None:

80

encoding = 'ascii'

81

return encoding

76

82

77

#-----------------------------------------------------------------------------

83

#-----------------------------------------------------------------------------

78

# Classes and functions

84

# Classes and functions

79

#-----------------------------------------------------------------------------

85

#-----------------------------------------------------------------------------

80

86

81

class BlockBreaker(object):

87

class BlockBreaker(object):

82

# Command compiler

88

# Command compiler

83

compile = None

89

compile = None

84

# Number of spaces of indentation

90

# Number of spaces of indentation

85

indent_spaces = 0

91

indent_spaces = 0

86

# String, indicating the default input encoding

92

# String, indicating the default input encoding

87

encoding = ''

93

encoding = ''

88

# String where the current full source input is stored, properly encoded

94

# String where the current full source input is stored, properly encoded

89

source = ''

95

source = ''

90

# Code object corresponding to the current source

96

# Code object corresponding to the current source

91

code = None

97

code = None

92

# Boolean indicating whether the current block is complete

98

# Boolean indicating whether the current block is complete

93

is_complete = None

99

is_complete = None

94

# Input mode

100

# Input mode

95

input_mode = 'append'

101

input_mode = 'append'

96

102

97

# Private attributes

103

# Private attributes

98

104

99

# List

105

# List

100

_buffer = None

106

_buffer = None

101

107

102

def __init__(self, input_mode=None):

108

def __init__(self, input_mode=None):

103

"""Create a new BlockBreaker instance.

109

"""Create a new BlockBreaker instance.

104

110

105

Parameters

111

Parameters

106

----------

112

----------

107

input_mode : str

113

input_mode : str

108

114

109

One of 'append', 'replace', default is 'append'. This controls how

115

One of 'append', 'replace', default is 'append'. This controls how

110

new inputs are used: in 'append' mode, they are appended to the

116

new inputs are used: in 'append' mode, they are appended to the

111

existing buffer and the whole buffer is compiled; in 'replace' mode,

117

existing buffer and the whole buffer is compiled; in 'replace' mode,

112

each new input completely replaces all prior inputs. Replace mode is

118

each new input completely replaces all prior inputs. Replace mode is

113

thus equivalent to prepending a full reset() to every push() call.

119

thus equivalent to prepending a full reset() to every push() call.

114

120

115

In practice, line-oriented clients likely want to use 'append' mode

121

In practice, line-oriented clients likely want to use 'append' mode

116

while block-oriented ones will want to use 'replace'.

122

while block-oriented ones will want to use 'replace'.

117

"""

123

"""

118

self._buffer = []

124

self._buffer = []

119

self.compile = codeop.CommandCompiler()

125

self.compile = codeop.CommandCompiler()

120

self.encoding = get_input_encoding()

126

self.encoding = get_input_encoding()

121

self.input_mode = BlockBreaker.input_mode if input_mode is None \

127

self.input_mode = BlockBreaker.input_mode if input_mode is None \

122

else input_mode

128

else input_mode

123

129

124

def reset(self):

130

def reset(self):

125

"""Reset the input buffer and associated state."""

131

"""Reset the input buffer and associated state."""

126

self.indent_spaces = 0

132

self.indent_spaces = 0

127

self._buffer[:] = []

133

self._buffer[:] = []

128

self.source = ''

134

self.source = ''

129

self.code = None

135

self.code = None

130

136

131

def source_reset(self):

137

def source_reset(self):

132

"""Return the input source and perform a full reset.

138

"""Return the input source and perform a full reset.

133

"""

139

"""

134

out = self.source

140

out = self.source

135

self.reset()

141

self.reset()

136

return out

142

return out

137

143

138

def push(self, lines):

144

def push(self, lines):

139

"""Push one ore more lines of input.

145

"""Push one ore more lines of input.

140

146

141

This stores the given lines and returns a status code indicating

147

This stores the given lines and returns a status code indicating

142

whether the code forms a complete Python block or not.

148

whether the code forms a complete Python block or not.

143

149

144

Any exceptions generated in compilation are allowed to propagate.

150

Any exceptions generated in compilation are allowed to propagate.

145

151

146

Parameters

152

Parameters

147

----------

153

----------

148

lines : string

154

lines : string

149

One or more lines of Python input.

155

One or more lines of Python input.

150

156

151

Returns

157

Returns

152

-------

158

-------

153

is_complete : boolean

159

is_complete : boolean

154

True if the current input source (the result of the current input

160

True if the current input source (the result of the current input

155

plus prior inputs) forms a complete Python execution block. Note that

161

plus prior inputs) forms a complete Python execution block. Note that

156

this value is also stored as an attribute so it can be queried at any

162

this value is also stored as an attribute so it can be queried at any

157

time.

163

time.

158

"""

164

"""

159

if self.input_mode == 'replace':

165

if self.input_mode == 'replace':

160

self.reset()

166

self.reset()

161

167

162

# If the source code has leading blanks, add 'if 1:\n' to it

168

# If the source code has leading blanks, add 'if 1:\n' to it

163

# this allows execution of indented pasted code. It is tempting

169

# this allows execution of indented pasted code. It is tempting

164

# to add '\n' at the end of source to run commands like ' a=1'

170

# to add '\n' at the end of source to run commands like ' a=1'

165

# directly, but this fails for more complicated scenarios

171

# directly, but this fails for more complicated scenarios

166

if not self._buffer and lines[:1] in [' ', '\t']:

172

if not self._buffer and lines[:1] in [' ', '\t']:

167

lines = 'if 1:\n%s' % lines

173

lines = 'if 1:\n%s' % lines

168

174

169

self._store(lines)

175

self._store(lines)

170

source = self.source

176

source = self.source

171

177

172

# Before calling compile(), reset the code object to None so that if an

178

# Before calling compile(), reset the code object to None so that if an

173

# exception is raised in compilation, we don't mislead by having

179

# exception is raised in compilation, we don't mislead by having

174

# inconsistent code/source attributes.

180

# inconsistent code/source attributes.

175

self.code, self.is_complete = None, None

181

self.code, self.is_complete = None, None

176

try:

182

try:

177

self.code = self.compile(source)

183

self.code = self.compile(source)

178

# Invalid syntax can produce any of a number of different errors from

184

# Invalid syntax can produce any of a number of different errors from

179

# inside the compiler, so we have to catch them all. Syntax errors

185

# inside the compiler, so we have to catch them all. Syntax errors

180

# immediately produce a 'ready' block, so the invalid Python can be

186

# immediately produce a 'ready' block, so the invalid Python can be

181

# sent to the kernel for evaluation with possible ipython

187

# sent to the kernel for evaluation with possible ipython

182

# special-syntax conversion.

188

# special-syntax conversion.

183

except (SyntaxError, OverflowError, ValueError, TypeError, MemoryError):

189

except (SyntaxError, OverflowError, ValueError, TypeError, MemoryError):

184

self.is_complete = True

190

self.is_complete = True

185

else:

191

else:

186

# Compilation didn't produce any exceptions (though it may not have

192

# Compilation didn't produce any exceptions (though it may not have

187

# given a complete code object)

193

# given a complete code object)

188

self.is_complete = self.code is not None

194

self.is_complete = self.code is not None

189

self._update_indent(lines)

195

self._update_indent(lines)

190

196

191

return self.is_complete

197

return self.is_complete

192

198

193

def interactive_block_ready(self):

199

def interactive_block_ready(self):

194

"""Return whether a block of interactive input is ready for execution.

200

"""Return whether a block of interactive input is ready for execution.

195

201

196

This method is meant to be used by line-oriented frontends, who need to

202

This method is meant to be used by line-oriented frontends, who need to

197

guess whether a block is complete or not based solely on prior and

203

guess whether a block is complete or not based solely on prior and

198

current input lines. The BlockBreaker considers it has a complete

204

current input lines. The BlockBreaker considers it has a complete

199

interactive block when *all* of the following are true:

205

interactive block when *all* of the following are true:

200

206

201

1. The input compiles to a complete statement.

207

1. The input compiles to a complete statement.

202

208

203

2. The indentation level is flush-left (because if we are indented,

209

2. The indentation level is flush-left (because if we are indented,

204

like inside a function definition or for loop, we need to keep

210

like inside a function definition or for loop, we need to keep

205

reading new input).

211

reading new input).

206

212

207

3. There is one extra line consisting only of whitespace.

213

3. There is one extra line consisting only of whitespace.

208

214

209

Because of condition #3, this method should be used only by

215

Because of condition #3, this method should be used only by

210

*line-oriented* frontends, since it means that intermediate blank lines

216

*line-oriented* frontends, since it means that intermediate blank lines

211

are not allowed in function definitions (or any other indented block).

217

are not allowed in function definitions (or any other indented block).

212

218

213

Block-oriented frontends that have a separate keyboard event to

219

Block-oriented frontends that have a separate keyboard event to

214

indicate execution should use the :meth:`split_blocks` method instead.

220

indicate execution should use the :meth:`split_blocks` method instead.

215

"""

221

"""

216

if not self.is_complete:

222

if not self.is_complete:

217

return False

223

return False

218

if self.indent_spaces==0:

224

if self.indent_spaces==0:

219

return True

225

return True

220

last_line = self.source.splitlines()[-1]

226

last_line = self.source.splitlines()[-1]

221

if not last_line or last_line.isspace():

227

if not last_line or last_line.isspace():

222

return True

228

return True

223

else:

229

else:

224

return False

230

return False

225

231

226

def split_blocks(self, lines):

232

def split_blocks(self, lines):

227

"""Split a multiline string into multiple input blocks"""

233

"""Split a multiline string into multiple input blocks"""

228

raise NotImplementedError

234

raise NotImplementedError

229

235

230

#------------------------------------------------------------------------

236

#------------------------------------------------------------------------

231

# Private interface

237

# Private interface

232

#------------------------------------------------------------------------

238

#------------------------------------------------------------------------

233

239

234

def _update_indent(self, lines):

240

def _update_indent(self, lines):

235

"""Keep track of the indent level."""

241

"""Keep track of the indent level."""

236

242

237

for line in remove_comments(lines).splitlines():

243

for line in remove_comments(lines).splitlines():

238

244

239

if line and not line.isspace():

245

if line and not line.isspace():

240

if self.code is not None:

246

if self.code is not None:

241

inisp = num_ini_spaces(line)

247

inisp = num_ini_spaces(line)

242

if inisp < self.indent_spaces:

248

if inisp < self.indent_spaces:

243

self.indent_spaces = inisp

249

self.indent_spaces = inisp

244

250

245

if line[-1] == ':':

251

if line[-1] == ':':

246

self.indent_spaces += 4

252

self.indent_spaces += 4

247

elif dedent_re.match(line):

253

elif dedent_re.match(line):

248

self.indent_spaces -= 4

254

self.indent_spaces -= 4

249

255

250

def _store(self, lines):

256

def _store(self, lines):

251

"""Store one or more lines of input.

257

"""Store one or more lines of input.

252

258

253

If input lines are not newline-terminated, a newline is automatically

259

If input lines are not newline-terminated, a newline is automatically

254

appended."""

260

appended."""

255

261

256

if lines.endswith('\n'):

262

if lines.endswith('\n'):

257

self._buffer.append(lines)

263

self._buffer.append(lines)

258

else:

264

else:

259

self._buffer.append(lines+'\n')

265

self._buffer.append(lines+'\n')

260

self.source = ''.join(self._buffer).encode(self.encoding)

266

self.source = ''.join(self._buffer).encode(self.encoding)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """Analysis of text input into executable blocks.
             This is a simple example of how an interactive terminal-based client can use
             this tool::
                 bb = BlockBreaker()
                 while not bb.interactive_block_ready():
                     bb.push(raw_input('>>> '))
                 print 'Input source was:\n', bb.source,
             """
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2010  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             # stdlib
             import codeop
             import re
             import sys
             #-----------------------------------------------------------------------------
             # Utilities
             #-----------------------------------------------------------------------------
             # FIXME: move these utilities to the general ward...
             # compiled regexps for autoindent management
             dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
             ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
             def num_ini_spaces(s):
                 """Return the number of initial spaces in a string.
                 Note that tabs are counted as a single space.  For now, we do *not* support
                 mixing of tabs and spaces in the user's input.
                 Parameters
                 ----------
                 s : string
                 """
                 ini_spaces = ini_spaces_re.match(s)
                 if ini_spaces:
                     return ini_spaces.end()
                 else:
                     return 0
             def remove_comments(src):
                 """Remove all comments from input source.
                 Note: comments are NOT recognized inside of strings!
                 Parameters
                 ----------
                 src : string
                   A single or multiline input string.
                 Returns
                 -------
                 String with all Python comments removed.
                 """
                 return re.sub('#.*', '', src)
             def get_input_encoding():
                 """Return the default standard input encoding."""
-                return getattr(sys.stdin, 'encoding', 'ascii')
+                # There are strange environments for which sys.stdin.encoding is None. We
+                # ensure that a valid encoding is returned.
+                encoding = getattr(sys.stdin, 'encoding', None)
+                if encoding is None:
+                    encoding = 'ascii'
+                return encoding
             #-----------------------------------------------------------------------------
             # Classes and functions
             #-----------------------------------------------------------------------------
             class BlockBreaker(object):
                 # Command compiler
                 compile = None
                 # Number of spaces of indentation
                 indent_spaces = 0
                 # String, indicating the default input encoding
                 encoding = ''
                 # String where the current full source input is stored, properly encoded
                 source = ''
                 # Code object corresponding to the current source
                 code = None
                 # Boolean indicating whether the current block is complete
                 is_complete = None
                 # Input mode
                 input_mode = 'append'
                 # Private attributes
                 # List
                 _buffer = None
                 def __init__(self, input_mode=None):
                     """Create a new BlockBreaker instance.
                     Parameters
                     ----------
                     input_mode : str
                       One of 'append', 'replace', default is 'append'.  This controls how
                       new inputs are used: in 'append' mode, they are appended to the
                       existing buffer and the whole buffer is compiled; in 'replace' mode,
                       each new input completely replaces all prior inputs.  Replace mode is
                       thus equivalent to prepending a full reset() to every push() call.
                       In practice, line-oriented clients likely want to use 'append' mode
                       while block-oriented ones will want to use 'replace'.
                     """
                     self._buffer = []
                     self.compile = codeop.CommandCompiler()
                     self.encoding = get_input_encoding()
                     self.input_mode = BlockBreaker.input_mode if input_mode is None \
                                       else input_mode
                 def reset(self):
                     """Reset the input buffer and associated state."""
                     self.indent_spaces = 0
                     self._buffer[:] = []
                     self.source = ''
                     self.code = None
                 def source_reset(self):
                     """Return the input source and perform a full reset.
                     """
                     out = self.source
                     self.reset()
                     return out
                 def push(self, lines):
                     """Push one ore more lines of input.
                     This stores the given lines and returns a status code indicating
                     whether the code forms a complete Python block or not.
                     Any exceptions generated in compilation are allowed to propagate.
                     Parameters
                     ----------
                     lines : string
                       One or more lines of Python input.
                     Returns
                     -------
                     is_complete : boolean
                       True if the current input source (the result of the current input
                     plus prior inputs) forms a complete Python execution block.  Note that
                     this value is also stored as an attribute so it can be queried at any
                     time.
                     """
                     if self.input_mode == 'replace':
                         self.reset()
                     # If the source code has leading blanks, add 'if 1:\n' to it
                     # this allows execution of indented pasted code. It is tempting
                     # to add '\n' at the end of source to run commands like ' a=1'
                     # directly, but this fails for more complicated scenarios
                     if not self._buffer and lines[:1] in [' ', '\t']:
                         lines = 'if 1:\n%s' % lines
                     self._store(lines)
                     source = self.source
                     # Before calling compile(), reset the code object to None so that if an
                     # exception is raised in compilation, we don't mislead by having
                     # inconsistent code/source attributes.
                     self.code, self.is_complete = None, None
                     try:
                         self.code = self.compile(source)
                     # Invalid syntax can produce any of a number of different errors from
                     # inside the compiler, so we have to catch them all.  Syntax errors
                     # immediately produce a 'ready' block, so the invalid Python can be
                     # sent to the kernel for evaluation with possible ipython
                     # special-syntax conversion.
                     except (SyntaxError, OverflowError, ValueError, TypeError, MemoryError):
                         self.is_complete = True
                     else:
                         # Compilation didn't produce any exceptions (though it may not have
                         # given a complete code object)
                         self.is_complete = self.code is not None
                         self._update_indent(lines)
                     return self.is_complete
                 def interactive_block_ready(self):
                     """Return whether a block of interactive input is ready for execution.
                     This method is meant to be used by line-oriented frontends, who need to
                     guess whether a block is complete or not based solely on prior and
                     current input lines.  The BlockBreaker considers it has a complete
                     interactive block when *all* of the following are true:
 . The input compiles to a complete statement.
 . The indentation level is flush-left (because if we are indented,
                        like inside a function definition or for loop, we need to keep
                        reading new input).
 . There is one extra line consisting only of whitespace.
                     Because of condition #3, this method should be used only by
                     *line-oriented* frontends, since it means that intermediate blank lines
                     are not allowed in function definitions (or any other indented block).
                     Block-oriented frontends that have a separate keyboard event to
                     indicate execution should use the :meth:`split_blocks` method instead.
                     """
                     if not self.is_complete:
                         return False
                     if self.indent_spaces==0:
                         return True
                     last_line = self.source.splitlines()[-1]
                     if not last_line or last_line.isspace():
                         return True
                     else:
                         return False
                 def split_blocks(self, lines):
                     """Split a multiline string into multiple input blocks"""
                     raise NotImplementedError
                 #------------------------------------------------------------------------
                 # Private interface
                 #------------------------------------------------------------------------
                 def _update_indent(self, lines):
                     """Keep track of the indent level."""
                     for line in remove_comments(lines).splitlines():
                         if line and not line.isspace():
                             if self.code is not None:
                                 inisp = num_ini_spaces(line)
                                 if inisp < self.indent_spaces:
                                     self.indent_spaces = inisp
                             if line[-1] == ':':
                                 self.indent_spaces += 4
                             elif dedent_re.match(line):
                                 self.indent_spaces -= 4
                 def _store(self, lines):
                     """Store one or more lines of input.
                     If input lines are not newline-terminated, a newline is automatically
                     appended."""
                     if lines.endswith('\n'):
                         self._buffer.append(lines)
                     else:
                         self._buffer.append(lines+'\n')
                     self.source = ''.join(self._buffer).encode(self.encoding)