upstream/ipython Commit - r2674:ee5ae0cb

1

"""Analysis of text input into executable blocks.

1

"""Analysis of text input into executable blocks.

2

3

The main class in this module, :class:`InputSplitter`, is designed to break

3

The main class in this module, :class:`InputSplitter`, is designed to break

4

input from either interactive, line-by-line environments or block-based ones,

4

input from either interactive, line-by-line environments or block-based ones,

5

into standalone blocks that can be executed by Python as 'single' statements

5

into standalone blocks that can be executed by Python as 'single' statements

6

(thus triggering sys.displayhook).

6

(thus triggering sys.displayhook).

7

8

For more details, see the class docstring below.

8

For more details, see the class docstring below.

9

"""

9

"""

10

#-----------------------------------------------------------------------------

10

#-----------------------------------------------------------------------------

11

12

#

12

#

13

# Distributed under the terms of the BSD License. The full license is in

13

# Distributed under the terms of the BSD License. The full license is in

14

# the file COPYING, distributed as part of this software.

14

# the file COPYING, distributed as part of this software.

15

#-----------------------------------------------------------------------------

15

#-----------------------------------------------------------------------------

16

17

#-----------------------------------------------------------------------------

17

#-----------------------------------------------------------------------------

18

# Imports

18

# Imports

19

#-----------------------------------------------------------------------------

19

#-----------------------------------------------------------------------------

20

# stdlib

20

# stdlib

21

import codeop

21

import codeop

22

import re

22

import re

23

import sys

23

import sys

24

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

# Utilities

26

# Utilities

27

#-----------------------------------------------------------------------------

27

#-----------------------------------------------------------------------------

28

29

# FIXME: move these utilities to the general ward...

29

# FIXME: move these utilities to the general ward...

30

31

# compiled regexps for autoindent management

31

# compiled regexps for autoindent management

32

dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')

32

dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')

33

ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')

33

ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')

34

35

36

def num_ini_spaces(s):

36

def num_ini_spaces(s):

37

"""Return the number of initial spaces in a string.

37

"""Return the number of initial spaces in a string.

38

39

Note that tabs are counted as a single space. For now, we do *not* support

39

Note that tabs are counted as a single space. For now, we do *not* support

40

mixing of tabs and spaces in the user's input.

40

mixing of tabs and spaces in the user's input.

41

42

Parameters

42

Parameters

43

----------

43

----------

44

s : string

44

s : string

45

46

Returns

46

Returns

47

-------

47

-------

48

n : int

48

n : int

49

"""

49

"""

50

51

ini_spaces = ini_spaces_re.match(s)

51

ini_spaces = ini_spaces_re.match(s)

52

if ini_spaces:

52

if ini_spaces:

53

return ini_spaces.end()

53

return ini_spaces.end()

54

else:

54

else:

55

return 0

55

return 0

56

57

58

def remove_comments(src):

58

def remove_comments(src):

59

"""Remove all comments from input source.

59

"""Remove all comments from input source.

60

61

Note: comments are NOT recognized inside of strings!

61

Note: comments are NOT recognized inside of strings!

62

63

Parameters

63

Parameters

64

----------

64

----------

65

src : string

65

src : string

66

A single or multiline input string.

66

A single or multiline input string.

67

68

Returns

68

Returns

69

-------

69

-------

70

String with all Python comments removed.

70

String with all Python comments removed.

71

"""

71

"""

72

73

return re.sub('#.*', '', src)

73

return re.sub('#.*', '', src)

74

75

76

def get_input_encoding():

76

def get_input_encoding():

77

"""Return the default standard input encoding."""

77

"""Return the default standard input encoding."""

78

return getattr(sys.stdin, 'encoding', 'ascii')

78

# There are strange environments for which sys.stdin.encoding is None. We

79

# ensure that a valid encoding is returned.

80

encoding = getattr(sys.stdin, 'encoding', None)

81

if encoding is None:

82

encoding = 'ascii'

83

return encoding

79

84

80

#-----------------------------------------------------------------------------

85

#-----------------------------------------------------------------------------

81

# Classes and functions

86

# Classes and functions

82

#-----------------------------------------------------------------------------

87

#-----------------------------------------------------------------------------

83

88

84

class InputSplitter(object):

89

class InputSplitter(object):

85

"""An object that can split Python source input in executable blocks.

90

"""An object that can split Python source input in executable blocks.

86

91

87

This object is designed to be used in one of two basic modes:

92

This object is designed to be used in one of two basic modes:

88

93

89

1. By feeding it python source line-by-line, using :meth:`push`. In this

94

1. By feeding it python source line-by-line, using :meth:`push`. In this

90

mode, it will return on each push whether the currently pushed code

95

mode, it will return on each push whether the currently pushed code

91

could be executed already. In addition, it provides a method called

96

could be executed already. In addition, it provides a method called

92

:meth:`push_accepts_more` that can be used to query whether more input

97

:meth:`push_accepts_more` that can be used to query whether more input

93

can be pushed into a single interactive block.

98

can be pushed into a single interactive block.

94

99

95

2. By calling :meth:`split_blocks` with a single, multiline Python string,

100

2. By calling :meth:`split_blocks` with a single, multiline Python string,

96

that is then split into blocks each of which can be executed

101

that is then split into blocks each of which can be executed

97

interactively as a single statement.

102

interactively as a single statement.

98

103

99

This is a simple example of how an interactive terminal-based client can use

104

This is a simple example of how an interactive terminal-based client can use

100

this tool::

105

this tool::

101

106

102

isp = InputSplitter()

107

isp = InputSplitter()

103

while isp.push_accepts_more():

108

while isp.push_accepts_more():

104

indent = ' '*isp.indent_spaces

109

indent = ' '*isp.indent_spaces

105

prompt = '>>> ' + indent

110

prompt = '>>> ' + indent

106

line = indent + raw_input(prompt)

111

line = indent + raw_input(prompt)

107

isp.push(line)

112

isp.push(line)

108

print 'Input source was:\n', isp.source_reset(),

113

print 'Input source was:\n', isp.source_reset(),

109

"""

114

"""

110

# Number of spaces of indentation computed from input that has been pushed

115

# Number of spaces of indentation computed from input that has been pushed

111

# so far. This is the attributes callers should query to get the current

116

# so far. This is the attributes callers should query to get the current

112

# indentation level, in order to provide auto-indent facilities.

117

# indentation level, in order to provide auto-indent facilities.

113

indent_spaces = 0

118

indent_spaces = 0

114

# String, indicating the default input encoding. It is computed by default

119

# String, indicating the default input encoding. It is computed by default

115

# at initialization time via get_input_encoding(), but it can be reset by a

120

# at initialization time via get_input_encoding(), but it can be reset by a

116

# client with specific knowledge of the encoding.

121

# client with specific knowledge of the encoding.

117

encoding = ''

122

encoding = ''

118

# String where the current full source input is stored, properly encoded.

123

# String where the current full source input is stored, properly encoded.

119

# Reading this attribute is the normal way of querying the currently pushed

124

# Reading this attribute is the normal way of querying the currently pushed

120

# source code, that has been properly encoded.

125

# source code, that has been properly encoded.

121

source = ''

126

source = ''

122

# Code object corresponding to the current source. It is automatically

127

# Code object corresponding to the current source. It is automatically

123

# synced to the source, so it can be queried at any time to obtain the code

128

# synced to the source, so it can be queried at any time to obtain the code

124

# object; it will be None if the source doesn't compile to valid Python.

129

# object; it will be None if the source doesn't compile to valid Python.

125

code = None

130

code = None

126

# Input mode

131

# Input mode

127

input_mode = 'append'

132

input_mode = 'append'

128

133

129

# Private attributes

134

# Private attributes

130

135

131

# List with lines of input accumulated so far

136

# List with lines of input accumulated so far

132

_buffer = None

137

_buffer = None

133

# Command compiler

138

# Command compiler

134

_compile = None

139

_compile = None

135

# Mark when input has changed indentation all the way back to flush-left

140

# Mark when input has changed indentation all the way back to flush-left

136

_full_dedent = False

141

_full_dedent = False

137

# Boolean indicating whether the current block is complete

142

# Boolean indicating whether the current block is complete

138

_is_complete = None

143

_is_complete = None

139

144

140

def __init__(self, input_mode=None):

145

def __init__(self, input_mode=None):

141

"""Create a new InputSplitter instance.

146

"""Create a new InputSplitter instance.

142

147

143

Parameters

148

Parameters

144

----------

149

----------

145

input_mode : str

150

input_mode : str

146

151

147

One of 'append', 'replace', default is 'append'. This controls how

152

One of 'append', 'replace', default is 'append'. This controls how

148

new inputs are used: in 'append' mode, they are appended to the

153

new inputs are used: in 'append' mode, they are appended to the

149

existing buffer and the whole buffer is compiled; in 'replace' mode,

154

existing buffer and the whole buffer is compiled; in 'replace' mode,

150

each new input completely replaces all prior inputs. Replace mode is

155

each new input completely replaces all prior inputs. Replace mode is

151

thus equivalent to prepending a full reset() to every push() call.

156

thus equivalent to prepending a full reset() to every push() call.

152

157

153

In practice, line-oriented clients likely want to use 'append' mode

158

In practice, line-oriented clients likely want to use 'append' mode

154

while block-oriented ones will want to use 'replace'.

159

while block-oriented ones will want to use 'replace'.

155

"""

160

"""

156

self._buffer = []

161

self._buffer = []

157

self._compile = codeop.CommandCompiler()

162

self._compile = codeop.CommandCompiler()

158

self.encoding = get_input_encoding()

163

self.encoding = get_input_encoding()

159

self.input_mode = InputSplitter.input_mode if input_mode is None \

164

self.input_mode = InputSplitter.input_mode if input_mode is None \

160

else input_mode

165

else input_mode

161

166

162

def reset(self):

167

def reset(self):

163

"""Reset the input buffer and associated state."""

168

"""Reset the input buffer and associated state."""

164

self.indent_spaces = 0

169

self.indent_spaces = 0

165

self._buffer[:] = []

170

self._buffer[:] = []

166

self.source = ''

171

self.source = ''

167

self.code = None

172

self.code = None

168

self._is_complete = False

173

self._is_complete = False

169

self._full_dedent = False

174

self._full_dedent = False

170

175

171

def source_reset(self):

176

def source_reset(self):

172

"""Return the input source and perform a full reset.

177

"""Return the input source and perform a full reset.

173

"""

178

"""

174

out = self.source

179

out = self.source

175

self.reset()

180

self.reset()

176

return out

181

return out

177

182

178

def push(self, lines):

183

def push(self, lines):

179

"""Push one ore more lines of input.

184

"""Push one ore more lines of input.

180

185

181

This stores the given lines and returns a status code indicating

186

This stores the given lines and returns a status code indicating

182

whether the code forms a complete Python block or not.

187

whether the code forms a complete Python block or not.

183

188

184

Any exceptions generated in compilation are swallowed, but if an

189

Any exceptions generated in compilation are swallowed, but if an

185

exception was produced, the method returns True.

190

exception was produced, the method returns True.

186

191

187

Parameters

192

Parameters

188

----------

193

----------

189

lines : string

194

lines : string

190

One or more lines of Python input.

195

One or more lines of Python input.

191

196

192

Returns

197

Returns

193

-------

198

-------

194

is_complete : boolean

199

is_complete : boolean

195

True if the current input source (the result of the current input

200

True if the current input source (the result of the current input

196

plus prior inputs) forms a complete Python execution block. Note that

201

plus prior inputs) forms a complete Python execution block. Note that

197

this value is also stored as a private attribute (_is_complete), so it

202

this value is also stored as a private attribute (_is_complete), so it

198

can be queried at any time.

203

can be queried at any time.

199

"""

204

"""

200

if self.input_mode == 'replace':

205

if self.input_mode == 'replace':

201

self.reset()

206

self.reset()

202

207

203

# If the source code has leading blanks, add 'if 1:\n' to it

208

# If the source code has leading blanks, add 'if 1:\n' to it

204

# this allows execution of indented pasted code. It is tempting

209

# this allows execution of indented pasted code. It is tempting

205

# to add '\n' at the end of source to run commands like ' a=1'

210

# to add '\n' at the end of source to run commands like ' a=1'

206

# directly, but this fails for more complicated scenarios

211

# directly, but this fails for more complicated scenarios

207

if not self._buffer and lines[:1] in [' ', '\t']:

212

if not self._buffer and lines[:1] in [' ', '\t']:

208

lines = 'if 1:\n%s' % lines

213

lines = 'if 1:\n%s' % lines

209

214

210

self._store(lines)

215

self._store(lines)

211

source = self.source

216

source = self.source

212

217

213

# Before calling _compile(), reset the code object to None so that if an

218

# Before calling _compile(), reset the code object to None so that if an

214

# exception is raised in compilation, we don't mislead by having

219

# exception is raised in compilation, we don't mislead by having

215

# inconsistent code/source attributes.

220

# inconsistent code/source attributes.

216

self.code, self._is_complete = None, None

221

self.code, self._is_complete = None, None

217

222

218

self._update_indent(lines)

223

self._update_indent(lines)

219

try:

224

try:

220

self.code = self._compile(source)

225

self.code = self._compile(source)

221

# Invalid syntax can produce any of a number of different errors from

226

# Invalid syntax can produce any of a number of different errors from

222

# inside the compiler, so we have to catch them all. Syntax errors

227

# inside the compiler, so we have to catch them all. Syntax errors

223

# immediately produce a 'ready' block, so the invalid Python can be

228

# immediately produce a 'ready' block, so the invalid Python can be

224

# sent to the kernel for evaluation with possible ipython

229

# sent to the kernel for evaluation with possible ipython

225

# special-syntax conversion.

230

# special-syntax conversion.

226

except (SyntaxError, OverflowError, ValueError, TypeError,

231

except (SyntaxError, OverflowError, ValueError, TypeError,

227

MemoryError):

232

MemoryError):

228

self._is_complete = True

233

self._is_complete = True

229

else:

234

else:

230

# Compilation didn't produce any exceptions (though it may not have

235

# Compilation didn't produce any exceptions (though it may not have

231

# given a complete code object)

236

# given a complete code object)

232

self._is_complete = self.code is not None

237

self._is_complete = self.code is not None

233

238

234

return self._is_complete

239

return self._is_complete

235

240

236

def push_accepts_more(self):

241

def push_accepts_more(self):

237

"""Return whether a block of interactive input can accept more input.

242

"""Return whether a block of interactive input can accept more input.

238

243

239

This method is meant to be used by line-oriented frontends, who need to

244

This method is meant to be used by line-oriented frontends, who need to

240

guess whether a block is complete or not based solely on prior and

245

guess whether a block is complete or not based solely on prior and

241

current input lines. The InputSplitter considers it has a complete

246

current input lines. The InputSplitter considers it has a complete

242

interactive block and will not accept more input only when either a

247

interactive block and will not accept more input only when either a

243

SyntaxError is raised, or *all* of the following are true:

248

SyntaxError is raised, or *all* of the following are true:

244

249

245

1. The input compiles to a complete statement.

250

1. The input compiles to a complete statement.

246

251

247

2. The indentation level is flush-left (because if we are indented,

252

2. The indentation level is flush-left (because if we are indented,

248

like inside a function definition or for loop, we need to keep

253

like inside a function definition or for loop, we need to keep

249

reading new input).

254

reading new input).

250

255

251

3. There is one extra line consisting only of whitespace.

256

3. There is one extra line consisting only of whitespace.

252

257

253

Because of condition #3, this method should be used only by

258

Because of condition #3, this method should be used only by

254

*line-oriented* frontends, since it means that intermediate blank lines

259

*line-oriented* frontends, since it means that intermediate blank lines

255

are not allowed in function definitions (or any other indented block).

260

are not allowed in function definitions (or any other indented block).

256

261

257

Block-oriented frontends that have a separate keyboard event to

262

Block-oriented frontends that have a separate keyboard event to

258

indicate execution should use the :meth:`split_blocks` method instead.

263

indicate execution should use the :meth:`split_blocks` method instead.

259

264

260

If the current input produces a syntax error, this method immediately

265

If the current input produces a syntax error, this method immediately

261

returns False but does *not* raise the syntax error exception, as

266

returns False but does *not* raise the syntax error exception, as

262

typically clients will want to send invalid syntax to an execution

267

typically clients will want to send invalid syntax to an execution

263

backend which might convert the invalid syntax into valid Python via

268

backend which might convert the invalid syntax into valid Python via

264

one of the dynamic IPython mechanisms.

269

one of the dynamic IPython mechanisms.

265

"""

270

"""

266

271

267

if not self._is_complete:

272

if not self._is_complete:

268

return True

273

return True

269

274

270

if self.indent_spaces==0:

275

if self.indent_spaces==0:

271

return False

276

return False

272

277

273

last_line = self.source.splitlines()[-1]

278

last_line = self.source.splitlines()[-1]

274

return bool(last_line and not last_line.isspace())

279

return bool(last_line and not last_line.isspace())

275

280

276

def split_blocks(self, lines):

281

def split_blocks(self, lines):

277

"""Split a multiline string into multiple input blocks.

282

"""Split a multiline string into multiple input blocks.

278

283

279

Note: this method starts by performing a full reset().

284

Note: this method starts by performing a full reset().

280

285

281

Parameters

286

Parameters

282

----------

287

----------

283

lines : str

288

lines : str

284

A possibly multiline string.

289

A possibly multiline string.

285

290

286

Returns

291

Returns

287

-------

292

-------

288

blocks : list

293

blocks : list

289

A list of strings, each possibly multiline. Each string corresponds

294

A list of strings, each possibly multiline. Each string corresponds

290

to a single block that can be compiled in 'single' mode (unless it

295

to a single block that can be compiled in 'single' mode (unless it

291

has a syntax error)."""

296

has a syntax error)."""

292

297

293

# This code is fairly delicate. If you make any changes here, make

298

# This code is fairly delicate. If you make any changes here, make

294

# absolutely sure that you do run the full test suite and ALL tests

299

# absolutely sure that you do run the full test suite and ALL tests

295

# pass.

300

# pass.

296

301

297

self.reset()

302

self.reset()

298

blocks = []

303

blocks = []

299

304

300

# Reversed copy so we can use pop() efficiently and consume the input

305

# Reversed copy so we can use pop() efficiently and consume the input

301

# as a stack

306

# as a stack

302

lines = lines.splitlines()[::-1]

307

lines = lines.splitlines()[::-1]

303

# Outer loop over all input

308

# Outer loop over all input

304

while lines:

309

while lines:

305

# Inner loop to build each block

310

# Inner loop to build each block

306

while True:

311

while True:

307

# Safety exit from inner loop

312

# Safety exit from inner loop

308

if not lines:

313

if not lines:

309

break

314

break

310

# Grab next line but don't push it yet

315

# Grab next line but don't push it yet

311

next_line = lines.pop()

316

next_line = lines.pop()

312

# Blank/empty lines are pushed as-is

317

# Blank/empty lines are pushed as-is

313

if not next_line or next_line.isspace():

318

if not next_line or next_line.isspace():

314

self.push(next_line)

319

self.push(next_line)

315

continue

320

continue

316

321

317

# Check indentation changes caused by the *next* line

322

# Check indentation changes caused by the *next* line

318

indent_spaces, _full_dedent = self._find_indent(next_line)

323

indent_spaces, _full_dedent = self._find_indent(next_line)

319

324

320

# If the next line causes a dedent, it can be for two differnt

325

# If the next line causes a dedent, it can be for two differnt

321

# reasons: either an explicit de-dent by the user or a

326

# reasons: either an explicit de-dent by the user or a

322

# return/raise/pass statement. These MUST be handled

327

# return/raise/pass statement. These MUST be handled

323

# separately:

328

# separately:

324

#

329

#

325

# 1. the first case is only detected when the actual explicit

330

# 1. the first case is only detected when the actual explicit

326

# dedent happens, and that would be the *first* line of a *new*

331

# dedent happens, and that would be the *first* line of a *new*

327

# block. Thus, we must put the line back into the input buffer

332

# block. Thus, we must put the line back into the input buffer

328

# so that it starts a new block on the next pass.

333

# so that it starts a new block on the next pass.

329

#

334

#

330

# 2. the second case is detected in the line before the actual

335

# 2. the second case is detected in the line before the actual

331

# dedent happens, so , we consume the line and we can break out

336

# dedent happens, so , we consume the line and we can break out

332

# to start a new block.

337

# to start a new block.

333

338

334

# Case 1, explicit dedent causes a break

339

# Case 1, explicit dedent causes a break

335

if _full_dedent and not next_line.startswith(' '):

340

if _full_dedent and not next_line.startswith(' '):

336

lines.append(next_line)

341

lines.append(next_line)

337

break

342

break

338

343

339

# Otherwise any line is pushed

344

# Otherwise any line is pushed

340

self.push(next_line)

345

self.push(next_line)

341

346

342

# Case 2, full dedent with full block ready:

347

# Case 2, full dedent with full block ready:

343

if _full_dedent or \

348

if _full_dedent or \

344

self.indent_spaces==0 and not self.push_accepts_more():

349

self.indent_spaces==0 and not self.push_accepts_more():

345

break

350

break

346

# Form the new block with the current source input

351

# Form the new block with the current source input

347

blocks.append(self.source_reset())

352

blocks.append(self.source_reset())

348

353

349

return blocks

354

return blocks

350

355

351

#------------------------------------------------------------------------

356

#------------------------------------------------------------------------

352

# Private interface

357

# Private interface

353

#------------------------------------------------------------------------

358

#------------------------------------------------------------------------

354

359

355

def _find_indent(self, line):

360

def _find_indent(self, line):

356

"""Compute the new indentation level for a single line.

361

"""Compute the new indentation level for a single line.

357

362

358

Parameters

363

Parameters

359

----------

364

----------

360

line : str

365

line : str

361

A single new line of non-whitespace, non-comment Python input.

366

A single new line of non-whitespace, non-comment Python input.

362

367

363

Returns

368

Returns

364

-------

369

-------

365

indent_spaces : int

370

indent_spaces : int

366

New value for the indent level (it may be equal to self.indent_spaces

371

New value for the indent level (it may be equal to self.indent_spaces

367

if indentation doesn't change.

372

if indentation doesn't change.

368

373

369

full_dedent : boolean

374

full_dedent : boolean

370

Whether the new line causes a full flush-left dedent.

375

Whether the new line causes a full flush-left dedent.

371

"""

376

"""

372

indent_spaces = self.indent_spaces

377

indent_spaces = self.indent_spaces

373

full_dedent = self._full_dedent

378

full_dedent = self._full_dedent

374

379

375

inisp = num_ini_spaces(line)

380

inisp = num_ini_spaces(line)

376

if inisp < indent_spaces:

381

if inisp < indent_spaces:

377

indent_spaces = inisp

382

indent_spaces = inisp

378

if indent_spaces <= 0:

383

if indent_spaces <= 0:

379

#print 'Full dedent in text',self.source # dbg

384

#print 'Full dedent in text',self.source # dbg

380

full_dedent = True

385

full_dedent = True

381

386

382

if line[-1] == ':':

387

if line[-1] == ':':

383

indent_spaces += 4

388

indent_spaces += 4

384

elif dedent_re.match(line):

389

elif dedent_re.match(line):

385

indent_spaces -= 4

390

indent_spaces -= 4

386

if indent_spaces <= 0:

391

if indent_spaces <= 0:

387

full_dedent = True

392

full_dedent = True

388

393

389

# Safety

394

# Safety

390

if indent_spaces < 0:

395

if indent_spaces < 0:

391

indent_spaces = 0

396

indent_spaces = 0

392

#print 'safety' # dbg

397

#print 'safety' # dbg

393

398

394

return indent_spaces, full_dedent

399

return indent_spaces, full_dedent

395

400

396

def _update_indent(self, lines):

401

def _update_indent(self, lines):

397

for line in remove_comments(lines).splitlines():

402

for line in remove_comments(lines).splitlines():

398

if line and not line.isspace():

403

if line and not line.isspace():

399

self.indent_spaces, self._full_dedent = self._find_indent(line)

404

self.indent_spaces, self._full_dedent = self._find_indent(line)

400

405

401

def _store(self, lines):

406

def _store(self, lines):

402

"""Store one or more lines of input.

407

"""Store one or more lines of input.

403

408

404

If input lines are not newline-terminated, a newline is automatically

409

If input lines are not newline-terminated, a newline is automatically

405

appended."""

410

appended."""

406

411

407

if lines.endswith('\n'):

412

if lines.endswith('\n'):

408

self._buffer.append(lines)

413

self._buffer.append(lines)

409

else:

414

else:

410

self._buffer.append(lines+'\n')

415

self._buffer.append(lines+'\n')

411

self._set_source()

416

self._set_source()

412

417

413

def _set_source(self):

418

def _set_source(self):

414

self.source = ''.join(self._buffer).encode(self.encoding)

419

self.source = ''.join(self._buffer).encode(self.encoding)

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             """Analysis of text input into executable blocks.
             The main class in this module, :class:`InputSplitter`, is designed to break
             input from either interactive, line-by-line environments or block-based ones,
             into standalone blocks that can be executed by Python as 'single' statements
             (thus triggering sys.displayhook).
             For more details, see the class docstring below.
             """
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2010  The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             # stdlib
             import codeop
             import re
             import sys
             #-----------------------------------------------------------------------------
             # Utilities
             #-----------------------------------------------------------------------------
             # FIXME: move these utilities to the general ward...
             # compiled regexps for autoindent management
             dedent_re = re.compile(r'^\s+raise|^\s+return|^\s+pass')
             ini_spaces_re = re.compile(r'^([ \t\r\f\v]+)')
             def num_ini_spaces(s):
                 """Return the number of initial spaces in a string.
                 Note that tabs are counted as a single space.  For now, we do *not* support
                 mixing of tabs and spaces in the user's input.
                 Parameters
                 ----------
                 s : string
                 Returns
                 -------
                 n : int
                 """
                 ini_spaces = ini_spaces_re.match(s)
                 if ini_spaces:
                     return ini_spaces.end()
                 else:
                     return 0
             def remove_comments(src):
                 """Remove all comments from input source.
                 Note: comments are NOT recognized inside of strings!
                 Parameters
                 ----------
                 src : string
                   A single or multiline input string.
                 Returns
                 -------
                 String with all Python comments removed.
                 """
                 return re.sub('#.*', '', src)
             def get_input_encoding():
                 """Return the default standard input encoding."""
-                return getattr(sys.stdin, 'encoding', 'ascii')
+                # There are strange environments for which sys.stdin.encoding is None. We
+                # ensure that a valid encoding is returned.
+                encoding = getattr(sys.stdin, 'encoding', None)
+                if encoding is None:
+                    encoding = 'ascii'
+                return encoding
             #-----------------------------------------------------------------------------
             # Classes and functions
             #-----------------------------------------------------------------------------
             class InputSplitter(object):
                 """An object that can split Python source input in executable blocks.
                 This object is designed to be used in one of two basic modes:
 . By feeding it python source line-by-line, using :meth:`push`.  In this
                    mode, it will return on each push whether the currently pushed code
                    could be executed already.  In addition, it provides a method called
                    :meth:`push_accepts_more` that can be used to query whether more input
                    can be pushed into a single interactive block.
 . By calling :meth:`split_blocks` with a single, multiline Python string,
                    that is then split into blocks each of which can be executed
                    interactively as a single statement.
                 This is a simple example of how an interactive terminal-based client can use
                 this tool::
                     isp = InputSplitter()
                     while isp.push_accepts_more():
                         indent = ' '*isp.indent_spaces
                         prompt = '>>> ' + indent
                         line = indent + raw_input(prompt)
                         isp.push(line)
                     print 'Input source was:\n', isp.source_reset(),
                 """
                 # Number of spaces of indentation computed from input that has been pushed
                 # so far.  This is the attributes callers should query to get the current
                 # indentation level, in order to provide auto-indent facilities.
                 indent_spaces = 0
                 # String, indicating the default input encoding.  It is computed by default
                 # at initialization time via get_input_encoding(), but it can be reset by a
                 # client with specific knowledge of the encoding.
                 encoding = ''
                 # String where the current full source input is stored, properly encoded.
                 # Reading this attribute is the normal way of querying the currently pushed
                 # source code, that has been properly encoded.
                 source = ''
                 # Code object corresponding to the current source.  It is automatically
                 # synced to the source, so it can be queried at any time to obtain the code
                 # object; it will be None if the source doesn't compile to valid Python.
                 code = None
                 # Input mode
                 input_mode = 'append'
                 # Private attributes
                 # List with lines of input accumulated so far
                 _buffer = None
                 # Command compiler
                 _compile = None
                 # Mark when input has changed indentation all the way back to flush-left
                 _full_dedent = False
                 # Boolean indicating whether the current block is complete
                 _is_complete = None
                 def __init__(self, input_mode=None):
                     """Create a new InputSplitter instance.
                     Parameters
                     ----------
                     input_mode : str
                       One of 'append', 'replace', default is 'append'.  This controls how
                       new inputs are used: in 'append' mode, they are appended to the
                       existing buffer and the whole buffer is compiled; in 'replace' mode,
                       each new input completely replaces all prior inputs.  Replace mode is
                       thus equivalent to prepending a full reset() to every push() call.
                       In practice, line-oriented clients likely want to use 'append' mode
                       while block-oriented ones will want to use 'replace'.
                     """
                     self._buffer = []
                     self._compile = codeop.CommandCompiler()
                     self.encoding = get_input_encoding()
                     self.input_mode = InputSplitter.input_mode if input_mode is None \
                                       else input_mode
                 def reset(self):
                     """Reset the input buffer and associated state."""
                     self.indent_spaces = 0
                     self._buffer[:] = []
                     self.source = ''
                     self.code = None
                     self._is_complete = False
                     self._full_dedent = False
                 def source_reset(self):
                     """Return the input source and perform a full reset.
                     """
                     out = self.source
                     self.reset()
                     return out
                 def push(self, lines):
                     """Push one ore more lines of input.
                     This stores the given lines and returns a status code indicating
                     whether the code forms a complete Python block or not.
                     Any exceptions generated in compilation are swallowed, but if an
                     exception was produced, the method returns True.
                     Parameters
                     ----------
                     lines : string
                       One or more lines of Python input.
                     Returns
                     -------
                     is_complete : boolean
                       True if the current input source (the result of the current input
                     plus prior inputs) forms a complete Python execution block.  Note that
                     this value is also stored as a private attribute (_is_complete), so it
                     can be queried at any time.
                     """
                     if self.input_mode == 'replace':
                         self.reset()
                     # If the source code has leading blanks, add 'if 1:\n' to it
                     # this allows execution of indented pasted code. It is tempting
                     # to add '\n' at the end of source to run commands like ' a=1'
                     # directly, but this fails for more complicated scenarios
                     if not self._buffer and lines[:1] in [' ', '\t']:
                         lines = 'if 1:\n%s' % lines
                     self._store(lines)
                     source = self.source
                     # Before calling _compile(), reset the code object to None so that if an
                     # exception is raised in compilation, we don't mislead by having
                     # inconsistent code/source attributes.
                     self.code, self._is_complete = None, None
                     self._update_indent(lines)
                     try:
                         self.code = self._compile(source)
                     # Invalid syntax can produce any of a number of different errors from
                     # inside the compiler, so we have to catch them all.  Syntax errors
                     # immediately produce a 'ready' block, so the invalid Python can be
                     # sent to the kernel for evaluation with possible ipython
                     # special-syntax conversion.
                     except (SyntaxError, OverflowError, ValueError, TypeError,
                             MemoryError):
                         self._is_complete = True
                     else:
                         # Compilation didn't produce any exceptions (though it may not have
                         # given a complete code object)
                         self._is_complete = self.code is not None
                     return self._is_complete
                 def push_accepts_more(self):
                     """Return whether a block of interactive input can accept more input.
                     This method is meant to be used by line-oriented frontends, who need to
                     guess whether a block is complete or not based solely on prior and
                     current input lines.  The InputSplitter considers it has a complete
                     interactive block and will not accept more input only when either a
                     SyntaxError is raised, or *all* of the following are true:
 . The input compiles to a complete statement.
 . The indentation level is flush-left (because if we are indented,
                        like inside a function definition or for loop, we need to keep
                        reading new input).
 . There is one extra line consisting only of whitespace.
                     Because of condition #3, this method should be used only by
                     *line-oriented* frontends, since it means that intermediate blank lines
                     are not allowed in function definitions (or any other indented block).
                     Block-oriented frontends that have a separate keyboard event to
                     indicate execution should use the :meth:`split_blocks` method instead.
                     If the current input produces a syntax error, this method immediately
                     returns False but does *not* raise the syntax error exception, as
                     typically clients will want to send invalid syntax to an execution
                     backend which might convert the invalid syntax into valid Python via
                     one of the dynamic IPython mechanisms.
                     """
                     if not self._is_complete:
                         return True
                     if self.indent_spaces==0:
                         return False
                     last_line = self.source.splitlines()[-1]
                     return bool(last_line and not last_line.isspace())
                 def split_blocks(self, lines):
                     """Split a multiline string into multiple input blocks.
                     Note: this method starts by performing a full reset().
                     Parameters
                     ----------
                     lines : str
                       A possibly multiline string.
                     Returns
                     -------
                     blocks : list
                       A list of strings, each possibly multiline.  Each string corresponds
                       to a single block that can be compiled in 'single' mode (unless it
                       has a syntax error)."""
                     # This code is fairly delicate.  If you make any changes here, make
                     # absolutely sure that you do run the full test suite and ALL tests
                     # pass.
                     self.reset()
                     blocks = []
                     # Reversed copy so we can use pop() efficiently and consume the input
                     # as a stack
                     lines = lines.splitlines()[::-1]
                     # Outer loop over all input
                     while lines:
                         # Inner loop to build each block
                         while True:
                             # Safety exit from inner loop
                             if not lines:
                                 break
                             # Grab next line but don't push it yet
                             next_line = lines.pop()
                             # Blank/empty lines are pushed as-is
                             if not next_line or next_line.isspace():
                                 self.push(next_line)
                                 continue
                             # Check indentation changes caused by the *next* line
                             indent_spaces, _full_dedent = self._find_indent(next_line)
                             # If the next line causes a dedent, it can be for two differnt
                             # reasons: either an explicit de-dent by the user or a
                             # return/raise/pass statement.  These MUST be handled
                             # separately:
                             #
                             # 1. the first case is only detected when the actual explicit
                             # dedent happens, and that would be the *first* line of a *new*
                             # block.  Thus, we must put the line back into the input buffer
                             # so that it starts a new block on the next pass.
                             #
                             # 2. the second case is detected in the line before the actual
                             # dedent happens, so , we consume the line and we can break out
                             # to start a new block.
                             # Case 1, explicit dedent causes a break
                             if _full_dedent and not next_line.startswith(' '):
                                 lines.append(next_line)
                                 break
                             # Otherwise any line is pushed
                             self.push(next_line)
                             # Case 2, full dedent with full block ready:
                             if _full_dedent or \
                                    self.indent_spaces==0 and not self.push_accepts_more():
                                 break
                         # Form the new block with the current source input
                         blocks.append(self.source_reset())
                     return blocks
                 #------------------------------------------------------------------------
                 # Private interface
                 #------------------------------------------------------------------------
                 def _find_indent(self, line):
                     """Compute the new indentation level for a single line.
                     Parameters
                     ----------
                     line : str
                       A single new line of non-whitespace, non-comment Python input.
                     Returns
                     -------
                     indent_spaces : int
                       New value for the indent level (it may be equal to self.indent_spaces
                     if indentation doesn't change.
                     full_dedent : boolean
                       Whether the new line causes a full flush-left dedent.
                     """
                     indent_spaces = self.indent_spaces
                     full_dedent = self._full_dedent
                     inisp = num_ini_spaces(line)
                     if inisp < indent_spaces:
                         indent_spaces = inisp
                         if indent_spaces <= 0:
                             #print 'Full dedent in text',self.source # dbg
                             full_dedent = True
                     if line[-1] == ':':
                         indent_spaces += 4
                     elif dedent_re.match(line):
                         indent_spaces -= 4
                         if indent_spaces <= 0:
                             full_dedent = True
                     # Safety
                     if indent_spaces < 0:
                         indent_spaces = 0
                         #print 'safety' # dbg
                     return indent_spaces, full_dedent
                 def _update_indent(self, lines):
                     for line in remove_comments(lines).splitlines():
                         if line and not line.isspace():
                             self.indent_spaces, self._full_dedent = self._find_indent(line)
                 def _store(self, lines):
                     """Store one or more lines of input.
                     If input lines are not newline-terminated, a newline is automatically
                     appended."""
                     if lines.endswith('\n'):
                         self._buffer.append(lines)
                     else:
                         self._buffer.append(lines+'\n')
                     self._set_source()
                 def _set_source(self):
                     self.source = ''.join(self._buffer).encode(self.encoding)