##// END OF EJS Templates
Made blockbreakers' input encoding detection more robust to strange environments.
epatters -
Show More
@@ -1,260 +1,266 b''
1 1 """Analysis of text input into executable blocks.
2 2
3 3 This is a simple example of how an interactive terminal-based client can use
4 4 this tool::
5 5
6 6 bb = BlockBreaker()
7 7 while not bb.interactive_block_ready():
8 8 bb.push(raw_input('>>> '))
9 9 print 'Input source was:\n', bb.source,
10 10 """
11 11 #-----------------------------------------------------------------------------
12 12 # Copyright (C) 2010 The IPython Development Team
13 13 #
14 14 # Distributed under the terms of the BSD License. The full license is in
15 15 # the file COPYING, distributed as part of this software.
16 16 #-----------------------------------------------------------------------------
17 17
18 18 #-----------------------------------------------------------------------------
19 19 # Imports
20 20 #-----------------------------------------------------------------------------
21 21 # stdlib
22 22 import codeop
23 23 import re
24 24 import sys
25 25
26 26 #-----------------------------------------------------------------------------
27 27 # Utilities
28 28 #-----------------------------------------------------------------------------
29 29
30 30 # FIXME: move these utilities to the general ward...
31 31
32 32 # compiled regexps for autoindent management
33 33 dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
34 34 ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
35 35
36 36
37 37 def num_ini_spaces(s):
38 38 """Return the number of initial spaces in a string.
39 39
40 40 Note that tabs are counted as a single space. For now, we do *not* support
41 41 mixing of tabs and spaces in the user's input.
42 42
43 43 Parameters
44 44 ----------
45 45 s : string
46 46 """
47 47
48 48 ini_spaces = ini_spaces_re.match(s)
49 49 if ini_spaces:
50 50 return ini_spaces.end()
51 51 else:
52 52 return 0
53 53
54 54
55 55 def remove_comments(src):
56 56 """Remove all comments from input source.
57 57
58 58 Note: comments are NOT recognized inside of strings!
59 59
60 60 Parameters
61 61 ----------
62 62 src : string
63 63 A single or multiline input string.
64 64
65 65 Returns
66 66 -------
67 67 String with all Python comments removed.
68 68 """
69 69
70 70 return re.sub('#.*', '', src)
71 71
72 72
73 73 def get_input_encoding():
74 74 """Return the default standard input encoding."""
75 return getattr(sys.stdin, 'encoding', 'ascii')
75
76 # There are strange environments for which sys.stdin.encoding is None. We
77 # ensure that a valid encoding is returned.
78 encoding = getattr(sys.stdin, 'encoding', None)
79 if encoding is None:
80 encoding = 'ascii'
81 return encoding
76 82
77 83 #-----------------------------------------------------------------------------
78 84 # Classes and functions
79 85 #-----------------------------------------------------------------------------
80 86
81 87 class BlockBreaker(object):
82 88 # Command compiler
83 89 compile = None
84 90 # Number of spaces of indentation
85 91 indent_spaces = 0
86 92 # String, indicating the default input encoding
87 93 encoding = ''
88 94 # String where the current full source input is stored, properly encoded
89 95 source = ''
90 96 # Code object corresponding to the current source
91 97 code = None
92 98 # Boolean indicating whether the current block is complete
93 99 is_complete = None
94 100 # Input mode
95 101 input_mode = 'append'
96 102
97 103 # Private attributes
98 104
99 105 # List
100 106 _buffer = None
101 107
102 108 def __init__(self, input_mode=None):
103 109 """Create a new BlockBreaker instance.
104 110
105 111 Parameters
106 112 ----------
107 113 input_mode : str
108 114
109 115 One of 'append', 'replace', default is 'append'. This controls how
110 116 new inputs are used: in 'append' mode, they are appended to the
111 117 existing buffer and the whole buffer is compiled; in 'replace' mode,
112 118 each new input completely replaces all prior inputs. Replace mode is
113 119 thus equivalent to prepending a full reset() to every push() call.
114 120
115 121 In practice, line-oriented clients likely want to use 'append' mode
116 122 while block-oriented ones will want to use 'replace'.
117 123 """
118 124 self._buffer = []
119 125 self.compile = codeop.CommandCompiler()
120 126 self.encoding = get_input_encoding()
121 127 self.input_mode = BlockBreaker.input_mode if input_mode is None \
122 128 else input_mode
123 129
124 130 def reset(self):
125 131 """Reset the input buffer and associated state."""
126 132 self.indent_spaces = 0
127 133 self._buffer[:] = []
128 134 self.source = ''
129 135 self.code = None
130 136
131 137 def source_reset(self):
132 138 """Return the input source and perform a full reset.
133 139 """
134 140 out = self.source
135 141 self.reset()
136 142 return out
137 143
138 144 def push(self, lines):
139 145 """Push one ore more lines of input.
140 146
141 147 This stores the given lines and returns a status code indicating
142 148 whether the code forms a complete Python block or not.
143 149
144 150 Any exceptions generated in compilation are allowed to propagate.
145 151
146 152 Parameters
147 153 ----------
148 154 lines : string
149 155 One or more lines of Python input.
150 156
151 157 Returns
152 158 -------
153 159 is_complete : boolean
154 160 True if the current input source (the result of the current input
155 161 plus prior inputs) forms a complete Python execution block. Note that
156 162 this value is also stored as an attribute so it can be queried at any
157 163 time.
158 164 """
159 165 if self.input_mode == 'replace':
160 166 self.reset()
161 167
162 168 # If the source code has leading blanks, add 'if 1:\n' to it
163 169 # this allows execution of indented pasted code. It is tempting
164 170 # to add '\n' at the end of source to run commands like ' a=1'
165 171 # directly, but this fails for more complicated scenarios
166 172 if not self._buffer and lines[:1] in [' ', '\t']:
167 173 lines = 'if 1:\n%s' % lines
168 174
169 175 self._store(lines)
170 176 source = self.source
171 177
172 178 # Before calling compile(), reset the code object to None so that if an
173 179 # exception is raised in compilation, we don't mislead by having
174 180 # inconsistent code/source attributes.
175 181 self.code, self.is_complete = None, None
176 182 try:
177 183 self.code = self.compile(source)
178 184 # Invalid syntax can produce any of a number of different errors from
179 185 # inside the compiler, so we have to catch them all. Syntax errors
180 186 # immediately produce a 'ready' block, so the invalid Python can be
181 187 # sent to the kernel for evaluation with possible ipython
182 188 # special-syntax conversion.
183 189 except (SyntaxError, OverflowError, ValueError, TypeError, MemoryError):
184 190 self.is_complete = True
185 191 else:
186 192 # Compilation didn't produce any exceptions (though it may not have
187 193 # given a complete code object)
188 194 self.is_complete = self.code is not None
189 195 self._update_indent(lines)
190 196
191 197 return self.is_complete
192 198
193 199 def interactive_block_ready(self):
194 200 """Return whether a block of interactive input is ready for execution.
195 201
196 202 This method is meant to be used by line-oriented frontends, who need to
197 203 guess whether a block is complete or not based solely on prior and
198 204 current input lines. The BlockBreaker considers it has a complete
199 205 interactive block when *all* of the following are true:
200 206
201 207 1. The input compiles to a complete statement.
202 208
203 209 2. The indentation level is flush-left (because if we are indented,
204 210 like inside a function definition or for loop, we need to keep
205 211 reading new input).
206 212
207 213 3. There is one extra line consisting only of whitespace.
208 214
209 215 Because of condition #3, this method should be used only by
210 216 *line-oriented* frontends, since it means that intermediate blank lines
211 217 are not allowed in function definitions (or any other indented block).
212 218
213 219 Block-oriented frontends that have a separate keyboard event to
214 220 indicate execution should use the :meth:`split_blocks` method instead.
215 221 """
216 222 if not self.is_complete:
217 223 return False
218 224 if self.indent_spaces==0:
219 225 return True
220 226 last_line = self.source.splitlines()[-1]
221 227 if not last_line or last_line.isspace():
222 228 return True
223 229 else:
224 230 return False
225 231
226 232 def split_blocks(self, lines):
227 233 """Split a multiline string into multiple input blocks"""
228 234 raise NotImplementedError
229 235
230 236 #------------------------------------------------------------------------
231 237 # Private interface
232 238 #------------------------------------------------------------------------
233 239
234 240 def _update_indent(self, lines):
235 241 """Keep track of the indent level."""
236 242
237 243 for line in remove_comments(lines).splitlines():
238 244
239 245 if line and not line.isspace():
240 246 if self.code is not None:
241 247 inisp = num_ini_spaces(line)
242 248 if inisp < self.indent_spaces:
243 249 self.indent_spaces = inisp
244 250
245 251 if line[-1] == ':':
246 252 self.indent_spaces += 4
247 253 elif dedent_re.match(line):
248 254 self.indent_spaces -= 4
249 255
250 256 def _store(self, lines):
251 257 """Store one or more lines of input.
252 258
253 259 If input lines are not newline-terminated, a newline is automatically
254 260 appended."""
255 261
256 262 if lines.endswith('\n'):
257 263 self._buffer.append(lines)
258 264 else:
259 265 self._buffer.append(lines+'\n')
260 266 self.source = ''.join(self._buffer).encode(self.encoding)
General Comments 0
You need to be logged in to leave comments. Login now