##// END OF EJS Templates
Made blockbreakers' input encoding detection more robust to strange environments.
epatters -
Show More
@@ -1,260 +1,266 b''
1 """Analysis of text input into executable blocks.
1 """Analysis of text input into executable blocks.
2
2
3 This is a simple example of how an interactive terminal-based client can use
3 This is a simple example of how an interactive terminal-based client can use
4 this tool::
4 this tool::
5
5
6 bb = BlockBreaker()
6 bb = BlockBreaker()
7 while not bb.interactive_block_ready():
7 while not bb.interactive_block_ready():
8 bb.push(raw_input('>>> '))
8 bb.push(raw_input('>>> '))
9 print 'Input source was:\n', bb.source,
9 print 'Input source was:\n', bb.source,
10 """
10 """
11 #-----------------------------------------------------------------------------
11 #-----------------------------------------------------------------------------
12 # Copyright (C) 2010 The IPython Development Team
12 # Copyright (C) 2010 The IPython Development Team
13 #
13 #
14 # Distributed under the terms of the BSD License. The full license is in
14 # Distributed under the terms of the BSD License. The full license is in
15 # the file COPYING, distributed as part of this software.
15 # the file COPYING, distributed as part of this software.
16 #-----------------------------------------------------------------------------
16 #-----------------------------------------------------------------------------
17
17
18 #-----------------------------------------------------------------------------
18 #-----------------------------------------------------------------------------
19 # Imports
19 # Imports
20 #-----------------------------------------------------------------------------
20 #-----------------------------------------------------------------------------
21 # stdlib
21 # stdlib
22 import codeop
22 import codeop
23 import re
23 import re
24 import sys
24 import sys
25
25
26 #-----------------------------------------------------------------------------
26 #-----------------------------------------------------------------------------
27 # Utilities
27 # Utilities
28 #-----------------------------------------------------------------------------
28 #-----------------------------------------------------------------------------
29
29
30 # FIXME: move these utilities to the general ward...
30 # FIXME: move these utilities to the general ward...
31
31
32 # compiled regexps for autoindent management
32 # compiled regexps for autoindent management
33 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
33 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
34 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
34 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
35
35
36
36
37 def num_ini_spaces(s):
37 def num_ini_spaces(s):
38 """Return the number of initial spaces in a string.
38 """Return the number of initial spaces in a string.
39
39
40 Note that tabs are counted as a single space. For now, we do *not* support
40 Note that tabs are counted as a single space. For now, we do *not* support
41 mixing of tabs and spaces in the user's input.
41 mixing of tabs and spaces in the user's input.
42
42
43 Parameters
43 Parameters
44 ----------
44 ----------
45 s : string
45 s : string
46 """
46 """
47
47
48 ini_spaces = ini_spaces_re.match(s)
48 ini_spaces = ini_spaces_re.match(s)
49 if ini_spaces:
49 if ini_spaces:
50 return ini_spaces.end()
50 return ini_spaces.end()
51 else:
51 else:
52 return 0
52 return 0
53
53
54
54
55 def remove_comments(src):
55 def remove_comments(src):
56 """Remove all comments from input source.
56 """Remove all comments from input source.
57
57
58 Note: comments are NOT recognized inside of strings!
58 Note: comments are NOT recognized inside of strings!
59
59
60 Parameters
60 Parameters
61 ----------
61 ----------
62 src : string
62 src : string
63 A single or multiline input string.
63 A single or multiline input string.
64
64
65 Returns
65 Returns
66 -------
66 -------
67 String with all Python comments removed.
67 String with all Python comments removed.
68 """
68 """
69
69
70 return re.sub('#.*', '', src)
70 return re.sub('#.*', '', src)
71
71
72
72
73 def get_input_encoding():
73 def get_input_encoding():
74 """Return the default standard input encoding."""
74 """Return the default standard input encoding."""
75 return getattr(sys.stdin, 'encoding', 'ascii')
75
76 # There are strange environments for which sys.stdin.encoding is None. We
77 # ensure that a valid encoding is returned.
78 encoding = getattr(sys.stdin, 'encoding', None)
79 if encoding is None:
80 encoding = 'ascii'
81 return encoding
76
82
77 #-----------------------------------------------------------------------------
83 #-----------------------------------------------------------------------------
78 # Classes and functions
84 # Classes and functions
79 #-----------------------------------------------------------------------------
85 #-----------------------------------------------------------------------------
80
86
81 class BlockBreaker(object):
87 class BlockBreaker(object):
82 # Command compiler
88 # Command compiler
83 compile = None
89 compile = None
84 # Number of spaces of indentation
90 # Number of spaces of indentation
85 indent_spaces = 0
91 indent_spaces = 0
86 # String, indicating the default input encoding
92 # String, indicating the default input encoding
87 encoding = ''
93 encoding = ''
88 # String where the current full source input is stored, properly encoded
94 # String where the current full source input is stored, properly encoded
89 source = ''
95 source = ''
90 # Code object corresponding to the current source
96 # Code object corresponding to the current source
91 code = None
97 code = None
92 # Boolean indicating whether the current block is complete
98 # Boolean indicating whether the current block is complete
93 is_complete = None
99 is_complete = None
94 # Input mode
100 # Input mode
95 input_mode = 'append'
101 input_mode = 'append'
96
102
97 # Private attributes
103 # Private attributes
98
104
99 # List
105 # List
100 _buffer = None
106 _buffer = None
101
107
102 def __init__(self, input_mode=None):
108 def __init__(self, input_mode=None):
103 """Create a new BlockBreaker instance.
109 """Create a new BlockBreaker instance.
104
110
105 Parameters
111 Parameters
106 ----------
112 ----------
107 input_mode : str
113 input_mode : str
108
114
109 One of 'append', 'replace', default is 'append'. This controls how
115 One of 'append', 'replace', default is 'append'. This controls how
110 new inputs are used: in 'append' mode, they are appended to the
116 new inputs are used: in 'append' mode, they are appended to the
111 existing buffer and the whole buffer is compiled; in 'replace' mode,
117 existing buffer and the whole buffer is compiled; in 'replace' mode,
112 each new input completely replaces all prior inputs. Replace mode is
118 each new input completely replaces all prior inputs. Replace mode is
113 thus equivalent to prepending a full reset() to every push() call.
119 thus equivalent to prepending a full reset() to every push() call.
114
120
115 In practice, line-oriented clients likely want to use 'append' mode
121 In practice, line-oriented clients likely want to use 'append' mode
116 while block-oriented ones will want to use 'replace'.
122 while block-oriented ones will want to use 'replace'.
117 """
123 """
118 self._buffer = []
124 self._buffer = []
119 self.compile = codeop.CommandCompiler()
125 self.compile = codeop.CommandCompiler()
120 self.encoding = get_input_encoding()
126 self.encoding = get_input_encoding()
121 self.input_mode = BlockBreaker.input_mode if input_mode is None \
127 self.input_mode = BlockBreaker.input_mode if input_mode is None \
122 else input_mode
128 else input_mode
123
129
124 def reset(self):
130 def reset(self):
125 """Reset the input buffer and associated state."""
131 """Reset the input buffer and associated state."""
126 self.indent_spaces = 0
132 self.indent_spaces = 0
127 self._buffer[:] = []
133 self._buffer[:] = []
128 self.source = ''
134 self.source = ''
129 self.code = None
135 self.code = None
130
136
131 def source_reset(self):
137 def source_reset(self):
132 """Return the input source and perform a full reset.
138 """Return the input source and perform a full reset.
133 """
139 """
134 out = self.source
140 out = self.source
135 self.reset()
141 self.reset()
136 return out
142 return out
137
143
138 def push(self, lines):
144 def push(self, lines):
139 """Push one ore more lines of input.
145 """Push one ore more lines of input.
140
146
141 This stores the given lines and returns a status code indicating
147 This stores the given lines and returns a status code indicating
142 whether the code forms a complete Python block or not.
148 whether the code forms a complete Python block or not.
143
149
144 Any exceptions generated in compilation are allowed to propagate.
150 Any exceptions generated in compilation are allowed to propagate.
145
151
146 Parameters
152 Parameters
147 ----------
153 ----------
148 lines : string
154 lines : string
149 One or more lines of Python input.
155 One or more lines of Python input.
150
156
151 Returns
157 Returns
152 -------
158 -------
153 is_complete : boolean
159 is_complete : boolean
154 True if the current input source (the result of the current input
160 True if the current input source (the result of the current input
155 plus prior inputs) forms a complete Python execution block. Note that
161 plus prior inputs) forms a complete Python execution block. Note that
156 this value is also stored as an attribute so it can be queried at any
162 this value is also stored as an attribute so it can be queried at any
157 time.
163 time.
158 """
164 """
159 if self.input_mode == 'replace':
165 if self.input_mode == 'replace':
160 self.reset()
166 self.reset()
161
167
162 # If the source code has leading blanks, add 'if 1:\n' to it
168 # If the source code has leading blanks, add 'if 1:\n' to it
163 # this allows execution of indented pasted code. It is tempting
169 # this allows execution of indented pasted code. It is tempting
164 # to add '\n' at the end of source to run commands like ' a=1'
170 # to add '\n' at the end of source to run commands like ' a=1'
165 # directly, but this fails for more complicated scenarios
171 # directly, but this fails for more complicated scenarios
166 if not self._buffer and lines[:1] in [' ', '\t']:
172 if not self._buffer and lines[:1] in [' ', '\t']:
167 lines = 'if 1:\n%s' % lines
173 lines = 'if 1:\n%s' % lines
168
174
169 self._store(lines)
175 self._store(lines)
170 source = self.source
176 source = self.source
171
177
172 # Before calling compile(), reset the code object to None so that if an
178 # Before calling compile(), reset the code object to None so that if an
173 # exception is raised in compilation, we don't mislead by having
179 # exception is raised in compilation, we don't mislead by having
174 # inconsistent code/source attributes.
180 # inconsistent code/source attributes.
175 self.code, self.is_complete = None, None
181 self.code, self.is_complete = None, None
176 try:
182 try:
177 self.code = self.compile(source)
183 self.code = self.compile(source)
178 # Invalid syntax can produce any of a number of different errors from
184 # Invalid syntax can produce any of a number of different errors from
179 # inside the compiler, so we have to catch them all. Syntax errors
185 # inside the compiler, so we have to catch them all. Syntax errors
180 # immediately produce a 'ready' block, so the invalid Python can be
186 # immediately produce a 'ready' block, so the invalid Python can be
181 # sent to the kernel for evaluation with possible ipython
187 # sent to the kernel for evaluation with possible ipython
182 # special-syntax conversion.
188 # special-syntax conversion.
183 except (SyntaxError, OverflowError, ValueError, TypeError, MemoryError):
189 except (SyntaxError, OverflowError, ValueError, TypeError, MemoryError):
184 self.is_complete = True
190 self.is_complete = True
185 else:
191 else:
186 # Compilation didn't produce any exceptions (though it may not have
192 # Compilation didn't produce any exceptions (though it may not have
187 # given a complete code object)
193 # given a complete code object)
188 self.is_complete = self.code is not None
194 self.is_complete = self.code is not None
189 self._update_indent(lines)
195 self._update_indent(lines)
190
196
191 return self.is_complete
197 return self.is_complete
192
198
193 def interactive_block_ready(self):
199 def interactive_block_ready(self):
194 """Return whether a block of interactive input is ready for execution.
200 """Return whether a block of interactive input is ready for execution.
195
201
196 This method is meant to be used by line-oriented frontends, who need to
202 This method is meant to be used by line-oriented frontends, who need to
197 guess whether a block is complete or not based solely on prior and
203 guess whether a block is complete or not based solely on prior and
198 current input lines. The BlockBreaker considers it has a complete
204 current input lines. The BlockBreaker considers it has a complete
199 interactive block when *all* of the following are true:
205 interactive block when *all* of the following are true:
200
206
201 1. The input compiles to a complete statement.
207 1. The input compiles to a complete statement.
202
208
203 2. The indentation level is flush-left (because if we are indented,
209 2. The indentation level is flush-left (because if we are indented,
204 like inside a function definition or for loop, we need to keep
210 like inside a function definition or for loop, we need to keep
205 reading new input).
211 reading new input).
206
212
207 3. There is one extra line consisting only of whitespace.
213 3. There is one extra line consisting only of whitespace.
208
214
209 Because of condition #3, this method should be used only by
215 Because of condition #3, this method should be used only by
210 *line-oriented* frontends, since it means that intermediate blank lines
216 *line-oriented* frontends, since it means that intermediate blank lines
211 are not allowed in function definitions (or any other indented block).
217 are not allowed in function definitions (or any other indented block).
212
218
213 Block-oriented frontends that have a separate keyboard event to
219 Block-oriented frontends that have a separate keyboard event to
214 indicate execution should use the :meth:`split_blocks` method instead.
220 indicate execution should use the :meth:`split_blocks` method instead.
215 """
221 """
216 if not self.is_complete:
222 if not self.is_complete:
217 return False
223 return False
218 if self.indent_spaces==0:
224 if self.indent_spaces==0:
219 return True
225 return True
220 last_line = self.source.splitlines()[-1]
226 last_line = self.source.splitlines()[-1]
221 if not last_line or last_line.isspace():
227 if not last_line or last_line.isspace():
222 return True
228 return True
223 else:
229 else:
224 return False
230 return False
225
231
226 def split_blocks(self, lines):
232 def split_blocks(self, lines):
227 """Split a multiline string into multiple input blocks"""
233 """Split a multiline string into multiple input blocks"""
228 raise NotImplementedError
234 raise NotImplementedError
229
235
230 #------------------------------------------------------------------------
236 #------------------------------------------------------------------------
231 # Private interface
237 # Private interface
232 #------------------------------------------------------------------------
238 #------------------------------------------------------------------------
233
239
234 def _update_indent(self, lines):
240 def _update_indent(self, lines):
235 """Keep track of the indent level."""
241 """Keep track of the indent level."""
236
242
237 for line in remove_comments(lines).splitlines():
243 for line in remove_comments(lines).splitlines():
238
244
239 if line and not line.isspace():
245 if line and not line.isspace():
240 if self.code is not None:
246 if self.code is not None:
241 inisp = num_ini_spaces(line)
247 inisp = num_ini_spaces(line)
242 if inisp < self.indent_spaces:
248 if inisp < self.indent_spaces:
243 self.indent_spaces = inisp
249 self.indent_spaces = inisp
244
250
245 if line[-1] == ':':
251 if line[-1] == ':':
246 self.indent_spaces += 4
252 self.indent_spaces += 4
247 elif dedent_re.match(line):
253 elif dedent_re.match(line):
248 self.indent_spaces -= 4
254 self.indent_spaces -= 4
249
255
250 def _store(self, lines):
256 def _store(self, lines):
251 """Store one or more lines of input.
257 """Store one or more lines of input.
252
258
253 If input lines are not newline-terminated, a newline is automatically
259 If input lines are not newline-terminated, a newline is automatically
254 appended."""
260 appended."""
255
261
256 if lines.endswith('\n'):
262 if lines.endswith('\n'):
257 self._buffer.append(lines)
263 self._buffer.append(lines)
258 else:
264 else:
259 self._buffer.append(lines+'\n')
265 self._buffer.append(lines+'\n')
260 self.source = ''.join(self._buffer).encode(self.encoding)
266 self.source = ''.join(self._buffer).encode(self.encoding)
General Comments 0
You need to be logged in to leave comments. Login now