upstream/ipython Commit - r20121:eb87be73

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

5

This includes:

6

7

IPythonLexer, IPython3Lexer

7

IPythonLexer, IPython3Lexer

8

Lexers for pure IPython (python + magic/shell commands)

8

Lexers for pure IPython (python + magic/shell commands)

9

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

12

lexer reads everything but the Python code appearing in a traceback.

12

lexer reads everything but the Python code appearing in a traceback.

13

The full lexer combines the partial lexer with an IPython lexer.

13

The full lexer combines the partial lexer with an IPython lexer.

14

15

IPythonConsoleLexer

15

IPythonConsoleLexer

16

A lexer for IPython console sessions, with support for tracebacks.

16

A lexer for IPython console sessions, with support for tracebacks.

17

18

IPyLexer

18

IPyLexer

19

A friendly lexer which examines the first line of text and from it,

19

A friendly lexer which examines the first line of text and from it,

20

decides whether to use an IPython lexer or an IPython console lexer.

20

decides whether to use an IPython lexer or an IPython console lexer.

21

This is probably the only lexer that needs to be explicitly added

21

This is probably the only lexer that needs to be explicitly added

22

to Pygments.

22

to Pygments.

23

24

"""

24

"""

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

27

#

27

#

28

# Distributed under the terms of the Modified BSD License.

28

# Distributed under the terms of the Modified BSD License.

29

#

29

#

30

# The full license is in the file COPYING.txt, distributed with this software.

30

# The full license is in the file COPYING.txt, distributed with this software.

31

#-----------------------------------------------------------------------------

31

#-----------------------------------------------------------------------------

32

33

# Standard library

33

# Standard library

34

import re

34

import re

35

36

# Third party

36

# Third party

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

38

from pygments.lexer import (

38

from pygments.lexer import (

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

40

)

40

)

41

from pygments.token import (

41

from pygments.token import (

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

43

)

43

)

44

from pygments.util import get_bool_opt

44

from pygments.util import get_bool_opt

45

46

# Local

46

# Local

47

from IPython.testing.skipdoctest import skip_doctest

47

from IPython.testing.skipdoctest import skip_doctest

48

49

line_re = re.compile('.*?\n')

49

line_re = re.compile('.*?\n')

50

51

ipython_tokens = [

51

ipython_tokens = [

52

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

52

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

53

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

54

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

53

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

55

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

54

using(BashLexer), Text)),

56

using(BashLexer), Text)),

55

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

57

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

56

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

58

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

57

(r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

59

(r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

58

]

60

]

59

61

60

def build_ipy_lexer(python3):

62

def build_ipy_lexer(python3):

61

"""Builds IPython lexers depending on the value of `python3`.

63

"""Builds IPython lexers depending on the value of `python3`.

62

64

63

The lexer inherits from an appropriate Python lexer and then adds

65

The lexer inherits from an appropriate Python lexer and then adds

64

information about IPython specific keywords (i.e. magic commands,

66

information about IPython specific keywords (i.e. magic commands,

65

shell commands, etc.)

67

shell commands, etc.)

66

68

67

Parameters

69

Parameters

68

----------

70

----------

69

python3 : bool

71

python3 : bool

70

If `True`, then build an IPython lexer from a Python 3 lexer.

72

If `True`, then build an IPython lexer from a Python 3 lexer.

71

73

72

"""

74

"""

73

# It would be nice to have a single IPython lexer class which takes

75

# It would be nice to have a single IPython lexer class which takes

74

# a boolean `python3`. But since there are two Python lexer classes,

76

# a boolean `python3`. But since there are two Python lexer classes,

75

# we will also have two IPython lexer classes.

77

# we will also have two IPython lexer classes.

76

if python3:

78

if python3:

77

PyLexer = Python3Lexer

79

PyLexer = Python3Lexer

78

clsname = 'IPython3Lexer'

80

clsname = 'IPython3Lexer'

79

name = 'IPython3'

81

name = 'IPython3'

80

aliases = ['ipython3']

82

aliases = ['ipython3']

81

doc = """IPython3 Lexer"""

83

doc = """IPython3 Lexer"""

82

else:

84

else:

83

PyLexer = PythonLexer

85

PyLexer = PythonLexer

84

clsname = 'IPythonLexer'

86

clsname = 'IPythonLexer'

85

name = 'IPython'

87

name = 'IPython'

86

aliases = ['ipython2', 'ipython']

88

aliases = ['ipython2', 'ipython']

87

doc = """IPython Lexer"""

89

doc = """IPython Lexer"""

88

90

89

tokens = PyLexer.tokens.copy()

91

tokens = PyLexer.tokens.copy()

90

tokens['root'] = ipython_tokens + tokens['root']

92

tokens['root'] = ipython_tokens + tokens['root']

91

93

92

attrs = {'name': name, 'aliases': aliases,

94

attrs = {'name': name, 'aliases': aliases,

93

'__doc__': doc, 'tokens': tokens}

95

'__doc__': doc, 'tokens': tokens}

94

96

95

return type(name, (PyLexer,), attrs)

97

return type(name, (PyLexer,), attrs)

96

98

97

99

98

IPython3Lexer = build_ipy_lexer(python3=True)

100

IPython3Lexer = build_ipy_lexer(python3=True)

99

IPythonLexer = build_ipy_lexer(python3=False)

101

IPythonLexer = build_ipy_lexer(python3=False)

100

102

101

103

102

class IPythonPartialTracebackLexer(RegexLexer):

104

class IPythonPartialTracebackLexer(RegexLexer):

103

"""

105

"""

104

Partial lexer for IPython tracebacks.

106

Partial lexer for IPython tracebacks.

105

107

106

Handles all the non-python output. This works for both Python 2.x and 3.x.

108

Handles all the non-python output. This works for both Python 2.x and 3.x.

107

109

108

"""

110

"""

109

name = 'IPython Partial Traceback'

111

name = 'IPython Partial Traceback'

110

112

111

tokens = {

113

tokens = {

112

'root': [

114

'root': [

113

# Tracebacks for syntax errors have a different style.

115

# Tracebacks for syntax errors have a different style.

114

# For both types of tracebacks, we mark the first line with

116

# For both types of tracebacks, we mark the first line with

115

# Generic.Traceback. For syntax errors, we mark the filename

117

# Generic.Traceback. For syntax errors, we mark the filename

116

# as we mark the filenames for non-syntax tracebacks.

118

# as we mark the filenames for non-syntax tracebacks.

117

#

119

#

118

# These two regexps define how IPythonConsoleLexer finds a

120

# These two regexps define how IPythonConsoleLexer finds a

119

# traceback.

121

# traceback.

120

#

122

#

121

## Non-syntax traceback

123

## Non-syntax traceback

122

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

124

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

123

## Syntax traceback

125

## Syntax traceback

124

(r'^( File)(.*)(, line )(\d+\n)',

126

(r'^( File)(.*)(, line )(\d+\n)',

125

bygroups(Generic.Traceback, Name.Namespace,

127

bygroups(Generic.Traceback, Name.Namespace,

126

Generic.Traceback, Literal.Number.Integer)),

128

Generic.Traceback, Literal.Number.Integer)),

127

129

128

# (Exception Identifier)(Whitespace)(Traceback Message)

130

# (Exception Identifier)(Whitespace)(Traceback Message)

129

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

131

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

130

bygroups(Name.Exception, Generic.Whitespace, Text)),

132

bygroups(Name.Exception, Generic.Whitespace, Text)),

131

# (Module/Filename)(Text)(Callee)(Function Signature)

133

# (Module/Filename)(Text)(Callee)(Function Signature)

132

# Better options for callee and function signature?

134

# Better options for callee and function signature?

133

(r'(.*)( in )(.*)($.*$\n)',

135

(r'(.*)( in )(.*)($.*$\n)',

134

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

136

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

135

# Regular line: (Whitespace)(Line Number)(Python Code)

137

# Regular line: (Whitespace)(Line Number)(Python Code)

136

(r'(\s*?)(\d+)(.*?\n)',

138

(r'(\s*?)(\d+)(.*?\n)',

137

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

139

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

138

# Emphasized line: (Arrow)(Line Number)(Python Code)

140

# Emphasized line: (Arrow)(Line Number)(Python Code)

139

# Using Exception token so arrow color matches the Exception.

141

# Using Exception token so arrow color matches the Exception.

140

(r'(-*>?\s?)(\d+)(.*?\n)',

142

(r'(-*>?\s?)(\d+)(.*?\n)',

141

bygroups(Name.Exception, Literal.Number.Integer, Other)),

143

bygroups(Name.Exception, Literal.Number.Integer, Other)),

142

# (Exception Identifier)(Message)

144

# (Exception Identifier)(Message)

143

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

145

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

144

bygroups(Name.Exception, Text)),

146

bygroups(Name.Exception, Text)),

145

# Tag everything else as Other, will be handled later.

147

# Tag everything else as Other, will be handled later.

146

(r'.*\n', Other),

148

(r'.*\n', Other),

147

],

149

],

148

}

150

}

149

151

150

152

151

class IPythonTracebackLexer(DelegatingLexer):

153

class IPythonTracebackLexer(DelegatingLexer):

152

"""

154

"""

153

IPython traceback lexer.

155

IPython traceback lexer.

154

156

155

For doctests, the tracebacks can be snipped as much as desired with the

157

For doctests, the tracebacks can be snipped as much as desired with the

156

exception to the lines that designate a traceback. For non-syntax error

158

exception to the lines that designate a traceback. For non-syntax error

157

tracebacks, this is the line of hyphens. For syntax error tracebacks,

159

tracebacks, this is the line of hyphens. For syntax error tracebacks,

158

this is the line which lists the File and line number.

160

this is the line which lists the File and line number.

159

161

160

"""

162

"""

161

# The lexer inherits from DelegatingLexer. The "root" lexer is an

163

# The lexer inherits from DelegatingLexer. The "root" lexer is an

162

# appropriate IPython lexer, which depends on the value of the boolean

164

# appropriate IPython lexer, which depends on the value of the boolean

163

# `python3`. First, we parse with the partial IPython traceback lexer.

165

# `python3`. First, we parse with the partial IPython traceback lexer.

164

# Then, any code marked with the "Other" token is delegated to the root

166

# Then, any code marked with the "Other" token is delegated to the root

165

# lexer.

167

# lexer.

166

#

168

#

167

name = 'IPython Traceback'

169

name = 'IPython Traceback'

168

aliases = ['ipythontb']

170

aliases = ['ipythontb']

169

171

170

def __init__(self, **options):

172

def __init__(self, **options):

171

self.python3 = get_bool_opt(options, 'python3', False)

173

self.python3 = get_bool_opt(options, 'python3', False)

172

if self.python3:

174

if self.python3:

173

self.aliases = ['ipython3tb']

175

self.aliases = ['ipython3tb']

174

else:

176

else:

175

self.aliases = ['ipython2tb', 'ipythontb']

177

self.aliases = ['ipython2tb', 'ipythontb']

176

178

177

if self.python3:

179

if self.python3:

178

IPyLexer = IPython3Lexer

180

IPyLexer = IPython3Lexer

179

else:

181

else:

180

IPyLexer = IPythonLexer

182

IPyLexer = IPythonLexer

181

183

182

DelegatingLexer.__init__(self, IPyLexer,

184

DelegatingLexer.__init__(self, IPyLexer,

183

IPythonPartialTracebackLexer, **options)

185

IPythonPartialTracebackLexer, **options)

184

186

185

@skip_doctest

187

@skip_doctest

186

class IPythonConsoleLexer(Lexer):

188

class IPythonConsoleLexer(Lexer):

187

"""

189

"""

188

An IPython console lexer for IPython code-blocks and doctests, such as:

190

An IPython console lexer for IPython code-blocks and doctests, such as:

189

191

190

.. code-block:: rst

192

.. code-block:: rst

191

193

192

.. code-block:: ipythonconsole

194

.. code-block:: ipythonconsole

193

195

194

In [1]: a = 'foo'

196

In [1]: a = 'foo'

195

197

196

In [2]: a

198

In [2]: a

197

Out[2]: 'foo'

199

Out[2]: 'foo'

198

200

199

In [3]: print a

201

In [3]: print a

200

foo

202

foo

201

203

202

In [4]: 1 / 0

204

In [4]: 1 / 0

203

205

204

206

205

Support is also provided for IPython exceptions:

207

Support is also provided for IPython exceptions:

206

208

207

.. code-block:: rst

209

.. code-block:: rst

208

210

209

.. code-block:: ipythonconsole

211

.. code-block:: ipythonconsole

210

212

211

In [1]: raise Exception

213

In [1]: raise Exception

212

214

213

---------------------------------------------------------------------------

215

---------------------------------------------------------------------------

214

Exception Traceback (most recent call last)

216

Exception Traceback (most recent call last)

215

<ipython-input-1-fca2ab0ca76b> in <module>()

217

<ipython-input-1-fca2ab0ca76b> in <module>()

216

----> 1 raise Exception

218

----> 1 raise Exception

217

219

218

Exception:

220

Exception:

219

221

220

"""

222

"""

221

name = 'IPython console session'

223

name = 'IPython console session'

222

aliases = ['ipythonconsole']

224

aliases = ['ipythonconsole']

223

mimetypes = ['text/x-ipython-console']

225

mimetypes = ['text/x-ipython-console']

224

226

225

# The regexps used to determine what is input and what is output.

227

# The regexps used to determine what is input and what is output.

226

# The default prompts for IPython are:

228

# The default prompts for IPython are:

227

#

229

#

228

# c.PromptManager.in_template = 'In [\#]: '

230

# c.PromptManager.in_template = 'In [\#]: '

229

# c.PromptManager.in2_template = ' .\D.: '

231

# c.PromptManager.in2_template = ' .\D.: '

230

# c.PromptManager.out_template = 'Out[\#]: '

232

# c.PromptManager.out_template = 'Out[\#]: '

231

#

233

#

232

in1_regex = r'In \[[0-9]+\]: '

234

in1_regex = r'In \[[0-9]+\]: '

233

in2_regex = r' \.\.+\.: '

235

in2_regex = r' \.\.+\.: '

234

out_regex = r'Out\[[0-9]+\]: '

236

out_regex = r'Out\[[0-9]+\]: '

235

237

236

#: The regex to determine when a traceback starts.

238

#: The regex to determine when a traceback starts.

237

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

239

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

238

240

239

def __init__(self, **options):

241

def __init__(self, **options):

240

"""Initialize the IPython console lexer.

242

"""Initialize the IPython console lexer.

241

243

242

Parameters

244

Parameters

243

----------

245

----------

244

python3 : bool

246

python3 : bool

245

If `True`, then the console inputs are parsed using a Python 3

247

If `True`, then the console inputs are parsed using a Python 3

246

lexer. Otherwise, they are parsed using a Python 2 lexer.

248

lexer. Otherwise, they are parsed using a Python 2 lexer.

247

in1_regex : RegexObject

249

in1_regex : RegexObject

248

The compiled regular expression used to detect the start

250

The compiled regular expression used to detect the start

249

of inputs. Although the IPython configuration setting may have a

251

of inputs. Although the IPython configuration setting may have a

250

trailing whitespace, do not include it in the regex. If `None`,

252

trailing whitespace, do not include it in the regex. If `None`,

251

then the default input prompt is assumed.

253

then the default input prompt is assumed.

252

in2_regex : RegexObject

254

in2_regex : RegexObject

253

The compiled regular expression used to detect the continuation

255

The compiled regular expression used to detect the continuation

254

of inputs. Although the IPython configuration setting may have a

256

of inputs. Although the IPython configuration setting may have a

255

trailing whitespace, do not include it in the regex. If `None`,

257

trailing whitespace, do not include it in the regex. If `None`,

256

then the default input prompt is assumed.

258

then the default input prompt is assumed.

257

out_regex : RegexObject

259

out_regex : RegexObject

258

The compiled regular expression used to detect outputs. If `None`,

260

The compiled regular expression used to detect outputs. If `None`,

259

then the default output prompt is assumed.

261

then the default output prompt is assumed.

260

262

261

"""

263

"""

262

self.python3 = get_bool_opt(options, 'python3', False)

264

self.python3 = get_bool_opt(options, 'python3', False)

263

if self.python3:

265

if self.python3:

264

self.aliases = ['ipython3console']

266

self.aliases = ['ipython3console']

265

else:

267

else:

266

self.aliases = ['ipython2console', 'ipythonconsole']

268

self.aliases = ['ipython2console', 'ipythonconsole']

267

269

268

in1_regex = options.get('in1_regex', self.in1_regex)

270

in1_regex = options.get('in1_regex', self.in1_regex)

269

in2_regex = options.get('in2_regex', self.in2_regex)

271

in2_regex = options.get('in2_regex', self.in2_regex)

270

out_regex = options.get('out_regex', self.out_regex)

272

out_regex = options.get('out_regex', self.out_regex)

271

273

272

# So that we can work with input and output prompts which have been

274

# So that we can work with input and output prompts which have been

273

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

275

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

274

# we do not do this, then such prompts will be tagged as 'output'.

276

# we do not do this, then such prompts will be tagged as 'output'.

275

# The reason can't just use the rstrip'd variants instead is because

277

# The reason can't just use the rstrip'd variants instead is because

276

# we want any whitespace associated with the prompt to be inserted

278

# we want any whitespace associated with the prompt to be inserted

277

# with the token. This allows formatted code to be modified so as hide

279

# with the token. This allows formatted code to be modified so as hide

278

# the appearance of prompts, with the whitespace included. One example

280

# the appearance of prompts, with the whitespace included. One example

279

# use of this is in copybutton.js from the standard lib Python docs.

281

# use of this is in copybutton.js from the standard lib Python docs.

280

in1_regex_rstrip = in1_regex.rstrip() + '\n'

282

in1_regex_rstrip = in1_regex.rstrip() + '\n'

281

in2_regex_rstrip = in2_regex.rstrip() + '\n'

283

in2_regex_rstrip = in2_regex.rstrip() + '\n'

282

out_regex_rstrip = out_regex.rstrip() + '\n'

284

out_regex_rstrip = out_regex.rstrip() + '\n'

283

285

284

# Compile and save them all.

286

# Compile and save them all.

285

attrs = ['in1_regex', 'in2_regex', 'out_regex',

287

attrs = ['in1_regex', 'in2_regex', 'out_regex',

286

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

288

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

287

for attr in attrs:

289

for attr in attrs:

288

self.__setattr__(attr, re.compile(locals()[attr]))

290

self.__setattr__(attr, re.compile(locals()[attr]))

289

291

290

Lexer.__init__(self, **options)

292

Lexer.__init__(self, **options)

291

293

292

if self.python3:

294

if self.python3:

293

pylexer = IPython3Lexer

295

pylexer = IPython3Lexer

294

tblexer = IPythonTracebackLexer

296

tblexer = IPythonTracebackLexer

295

else:

297

else:

296

pylexer = IPythonLexer

298

pylexer = IPythonLexer

297

tblexer = IPythonTracebackLexer

299

tblexer = IPythonTracebackLexer

298

300

299

self.pylexer = pylexer(**options)

301

self.pylexer = pylexer(**options)

300

self.tblexer = tblexer(**options)

302

self.tblexer = tblexer(**options)

301

303

302

self.reset()

304

self.reset()

303

305

304

def reset(self):

306

def reset(self):

305

self.mode = 'output'

307

self.mode = 'output'

306

self.index = 0

308

self.index = 0

307

self.buffer = u''

309

self.buffer = u''

308

self.insertions = []

310

self.insertions = []

309

311

310

def buffered_tokens(self):

312

def buffered_tokens(self):

311

"""

313

"""

312

Generator of unprocessed tokens after doing insertions and before

314

Generator of unprocessed tokens after doing insertions and before

313

changing to a new state.

315

changing to a new state.

314

316

315

"""

317

"""

316

if self.mode == 'output':

318

if self.mode == 'output':

317

tokens = [(0, Generic.Output, self.buffer)]

319

tokens = [(0, Generic.Output, self.buffer)]

318

elif self.mode == 'input':

320

elif self.mode == 'input':

319

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

321

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

320

else: # traceback

322

else: # traceback

321

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

323

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

322

324

323

for i, t, v in do_insertions(self.insertions, tokens):

325

for i, t, v in do_insertions(self.insertions, tokens):

324

# All token indexes are relative to the buffer.

326

# All token indexes are relative to the buffer.

325

yield self.index + i, t, v

327

yield self.index + i, t, v

326

328

327

# Clear it all

329

# Clear it all

328

self.index += len(self.buffer)

330

self.index += len(self.buffer)

329

self.buffer = u''

331

self.buffer = u''

330

self.insertions = []

332

self.insertions = []

331

333

332

def get_mci(self, line):

334

def get_mci(self, line):

333

"""

335

"""

334

Parses the line and returns a 3-tuple: (mode, code, insertion).

336

Parses the line and returns a 3-tuple: (mode, code, insertion).

335

337

336

`mode` is the next mode (or state) of the lexer, and is always equal

338

`mode` is the next mode (or state) of the lexer, and is always equal

337

to 'input', 'output', or 'tb'.

339

to 'input', 'output', or 'tb'.

338

340

339

`code` is a portion of the line that should be added to the buffer

341

`code` is a portion of the line that should be added to the buffer

340

corresponding to the next mode and eventually lexed by another lexer.

342

corresponding to the next mode and eventually lexed by another lexer.

341

For example, `code` could be Python code if `mode` were 'input'.

343

For example, `code` could be Python code if `mode` were 'input'.

342

344

343

`insertion` is a 3-tuple (index, token, text) representing an

345

`insertion` is a 3-tuple (index, token, text) representing an

344

unprocessed "token" that will be inserted into the stream of tokens

346

unprocessed "token" that will be inserted into the stream of tokens

345

that are created from the buffer once we change modes. This is usually

347

that are created from the buffer once we change modes. This is usually

346

the input or output prompt.

348

the input or output prompt.

347

349

348

In general, the next mode depends on current mode and on the contents

350

In general, the next mode depends on current mode and on the contents

349

of `line`.

351

of `line`.

350

352

351

"""

353

"""

352

# To reduce the number of regex match checks, we have multiple

354

# To reduce the number of regex match checks, we have multiple

353

# 'if' blocks instead of 'if-elif' blocks.

355

# 'if' blocks instead of 'if-elif' blocks.

354

356

355

# Check for possible end of input

357

# Check for possible end of input

356

in2_match = self.in2_regex.match(line)

358

in2_match = self.in2_regex.match(line)

357

in2_match_rstrip = self.in2_regex_rstrip.match(line)

359

in2_match_rstrip = self.in2_regex_rstrip.match(line)

358

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

360

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

359

in2_match_rstrip:

361

in2_match_rstrip:

360

end_input = True

362

end_input = True

361

else:

363

else:

362

end_input = False

364

end_input = False

363

if end_input and self.mode != 'tb':

365

if end_input and self.mode != 'tb':

364

# Only look for an end of input when not in tb mode.

366

# Only look for an end of input when not in tb mode.

365

# An ellipsis could appear within the traceback.

367

# An ellipsis could appear within the traceback.

366

mode = 'output'

368

mode = 'output'

367

code = u''

369

code = u''

368

insertion = (0, Generic.Prompt, line)

370

insertion = (0, Generic.Prompt, line)

369

return mode, code, insertion

371

return mode, code, insertion

370

372

371

# Check for output prompt

373

# Check for output prompt

372

out_match = self.out_regex.match(line)

374

out_match = self.out_regex.match(line)

373

out_match_rstrip = self.out_regex_rstrip.match(line)

375

out_match_rstrip = self.out_regex_rstrip.match(line)

374

if out_match or out_match_rstrip:

376

if out_match or out_match_rstrip:

375

mode = 'output'

377

mode = 'output'

376

if out_match:

378

if out_match:

377

idx = out_match.end()

379

idx = out_match.end()

378

else:

380

else:

379

idx = out_match_rstrip.end()

381

idx = out_match_rstrip.end()

380

code = line[idx:]

382

code = line[idx:]

381

# Use the 'heading' token for output. We cannot use Generic.Error

383

# Use the 'heading' token for output. We cannot use Generic.Error

382

# since it would conflict with exceptions.

384

# since it would conflict with exceptions.

383

insertion = (0, Generic.Heading, line[:idx])

385

insertion = (0, Generic.Heading, line[:idx])

384

return mode, code, insertion

386

return mode, code, insertion

385

387

386

388

387

# Check for input or continuation prompt (non stripped version)

389

# Check for input or continuation prompt (non stripped version)

388

in1_match = self.in1_regex.match(line)

390

in1_match = self.in1_regex.match(line)

389

if in1_match or (in2_match and self.mode != 'tb'):

391

if in1_match or (in2_match and self.mode != 'tb'):

390

# New input or when not in tb, continued input.

392

# New input or when not in tb, continued input.

391

# We do not check for continued input when in tb since it is

393

# We do not check for continued input when in tb since it is

392

# allowable to replace a long stack with an ellipsis.

394

# allowable to replace a long stack with an ellipsis.

393

mode = 'input'

395

mode = 'input'

394

if in1_match:

396

if in1_match:

395

idx = in1_match.end()

397

idx = in1_match.end()

396

else: # in2_match

398

else: # in2_match

397

idx = in2_match.end()

399

idx = in2_match.end()

398

code = line[idx:]

400

code = line[idx:]

399

insertion = (0, Generic.Prompt, line[:idx])

401

insertion = (0, Generic.Prompt, line[:idx])

400

return mode, code, insertion

402

return mode, code, insertion

401

403

402

# Check for input or continuation prompt (stripped version)

404

# Check for input or continuation prompt (stripped version)

403

in1_match_rstrip = self.in1_regex_rstrip.match(line)

405

in1_match_rstrip = self.in1_regex_rstrip.match(line)

404

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

406

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

405

# New input or when not in tb, continued input.

407

# New input or when not in tb, continued input.

406

# We do not check for continued input when in tb since it is

408

# We do not check for continued input when in tb since it is

407

# allowable to replace a long stack with an ellipsis.

409

# allowable to replace a long stack with an ellipsis.

408

mode = 'input'

410

mode = 'input'

409

if in1_match_rstrip:

411

if in1_match_rstrip:

410

idx = in1_match_rstrip.end()

412

idx = in1_match_rstrip.end()

411

else: # in2_match

413

else: # in2_match

412

idx = in2_match_rstrip.end()

414

idx = in2_match_rstrip.end()

413

code = line[idx:]

415

code = line[idx:]

414

insertion = (0, Generic.Prompt, line[:idx])

416

insertion = (0, Generic.Prompt, line[:idx])

415

return mode, code, insertion

417

return mode, code, insertion

416

418

417

# Check for traceback

419

# Check for traceback

418

if self.ipytb_start.match(line):

420

if self.ipytb_start.match(line):

419

mode = 'tb'

421

mode = 'tb'

420

code = line

422

code = line

421

insertion = None

423

insertion = None

422

return mode, code, insertion

424

return mode, code, insertion

423

425

424

# All other stuff...

426

# All other stuff...

425

if self.mode in ('input', 'output'):

427

if self.mode in ('input', 'output'):

426

# We assume all other text is output. Multiline input that

428

# We assume all other text is output. Multiline input that

427

# does not use the continuation marker cannot be detected.

429

# does not use the continuation marker cannot be detected.

428

# For example, the 3 in the following is clearly output:

430

# For example, the 3 in the following is clearly output:

429

#

431

#

430

# In [1]: print 3

432

# In [1]: print 3

431

# 3

433

# 3

432

#

434

#

433

# But the following second line is part of the input:

435

# But the following second line is part of the input:

434

#

436

#

435

# In [2]: while True:

437

# In [2]: while True:

436

# print True

438

# print True

437

#

439

#

438

# In both cases, the 2nd line will be 'output'.

440

# In both cases, the 2nd line will be 'output'.

439

#

441

#

440

mode = 'output'

442

mode = 'output'

441

else:

443

else:

442

mode = 'tb'

444

mode = 'tb'

443

445

444

code = line

446

code = line

445

insertion = None

447

insertion = None

446

448

447

return mode, code, insertion

449

return mode, code, insertion

448

450

449

def get_tokens_unprocessed(self, text):

451

def get_tokens_unprocessed(self, text):

450

self.reset()

452

self.reset()

451

for match in line_re.finditer(text):

453

for match in line_re.finditer(text):

452

line = match.group()

454

line = match.group()

453

mode, code, insertion = self.get_mci(line)

455

mode, code, insertion = self.get_mci(line)

454

456

455

if mode != self.mode:

457

if mode != self.mode:

456

# Yield buffered tokens before transitioning to new mode.

458

# Yield buffered tokens before transitioning to new mode.

457

for token in self.buffered_tokens():

459

for token in self.buffered_tokens():

458

yield token

460

yield token

459

self.mode = mode

461

self.mode = mode

460

462

461

if insertion:

463

if insertion:

462

self.insertions.append((len(self.buffer), [insertion]))

464

self.insertions.append((len(self.buffer), [insertion]))

463

self.buffer += code

465

self.buffer += code

464

else:

466

else:

465

for token in self.buffered_tokens():

467

for token in self.buffered_tokens():

466

yield token

468

yield token

467

469

468

class IPyLexer(Lexer):

470

class IPyLexer(Lexer):

469

"""

471

"""

470

Primary lexer for all IPython-like code.

472

Primary lexer for all IPython-like code.

471

473

472

This is a simple helper lexer. If the first line of the text begins with

474

This is a simple helper lexer. If the first line of the text begins with

473

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

475

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

474

lexer. If not, then the entire text is parsed with an IPython lexer.

476

lexer. If not, then the entire text is parsed with an IPython lexer.

475

477

476

The goal is to reduce the number of lexers that are registered

478

The goal is to reduce the number of lexers that are registered

477

with Pygments.

479

with Pygments.

478

480

479

"""

481

"""

480

name = 'IPy session'

482

name = 'IPy session'

481

aliases = ['ipy']

483

aliases = ['ipy']

482

484

483

def __init__(self, **options):

485

def __init__(self, **options):

484

self.python3 = get_bool_opt(options, 'python3', False)

486

self.python3 = get_bool_opt(options, 'python3', False)

485

if self.python3:

487

if self.python3:

486

self.aliases = ['ipy3']

488

self.aliases = ['ipy3']

487

else:

489

else:

488

self.aliases = ['ipy2', 'ipy']

490

self.aliases = ['ipy2', 'ipy']

489

491

490

Lexer.__init__(self, **options)

492

Lexer.__init__(self, **options)

491

493

492

self.IPythonLexer = IPythonLexer(**options)

494

self.IPythonLexer = IPythonLexer(**options)

493

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

495

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

494

496

495

def get_tokens_unprocessed(self, text):

497

def get_tokens_unprocessed(self, text):

496

# Search for the input prompt anywhere...this allows code blocks to

498

# Search for the input prompt anywhere...this allows code blocks to

497

# begin with comments as well.

499

# begin with comments as well.

498

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

500

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

499

lex = self.IPythonConsoleLexer

501

lex = self.IPythonConsoleLexer

500

else:

502

else:

501

lex = self.IPythonLexer

503

lex = self.IPythonLexer

502

for token in lex.get_tokens_unprocessed(text):

504

for token in lex.get_tokens_unprocessed(text):

503

yield token

505

yield token

504

506

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
             Defines a variety of Pygments lexers for highlighting IPython code.
             This includes:
                 IPythonLexer, IPython3Lexer
                     Lexers for pure IPython (python + magic/shell commands)
                 IPythonPartialTracebackLexer, IPythonTracebackLexer
                     Supports 2.x and 3.x via keyword `python3`.  The partial traceback
                     lexer reads everything but the Python code appearing in a traceback.
                     The full lexer combines the partial lexer with an IPython lexer.
                 IPythonConsoleLexer
                     A lexer for IPython console sessions, with support for tracebacks.
                 IPyLexer
                     A friendly lexer which examines the first line of text and from it,
                     decides whether to use an IPython lexer or an IPython console lexer.
                     This is probably the only lexer that needs to be explicitly added
                     to Pygments.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             # Standard library
             import re
             # Third party
             from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
             from pygments.lexer import (
                 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
             )
             from pygments.token import (
                 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
             )
             from pygments.util import get_bool_opt
             # Local
             from IPython.testing.skipdoctest import skip_doctest
             line_re = re.compile('.*?\n')
             ipython_tokens = [
               (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
+              (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
+              (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
               (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                    using(BashLexer), Text)),
               (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
               (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
               (r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
             ]
             def build_ipy_lexer(python3):
                 """Builds IPython lexers depending on the value of `python3`.
                 The lexer inherits from an appropriate Python lexer and then adds
                 information about IPython specific keywords (i.e. magic commands,
                 shell commands, etc.)
                 Parameters
                 ----------
                 python3 : bool
                     If `True`, then build an IPython lexer from a Python 3 lexer.
                 """
                 # It would be nice to have a single IPython lexer class which takes
                 # a boolean `python3`.  But since there are two Python lexer classes,
                 # we will also have two IPython lexer classes.
                 if python3:
                     PyLexer = Python3Lexer
                     clsname = 'IPython3Lexer'
                     name = 'IPython3'
                     aliases = ['ipython3']
                     doc = """IPython3 Lexer"""
                 else:
                     PyLexer = PythonLexer
                     clsname = 'IPythonLexer'
                     name = 'IPython'
                     aliases = ['ipython2', 'ipython']
                     doc = """IPython Lexer"""
                 tokens = PyLexer.tokens.copy()
                 tokens['root'] = ipython_tokens + tokens['root']
                 attrs = {'name': name, 'aliases': aliases,
                          '__doc__': doc, 'tokens': tokens}
                 return type(name, (PyLexer,), attrs)
             IPython3Lexer = build_ipy_lexer(python3=True)
             IPythonLexer = build_ipy_lexer(python3=False)
             class IPythonPartialTracebackLexer(RegexLexer):
                 """
                 Partial lexer for IPython tracebacks.
                 Handles all the non-python output. This works for both Python 2.x and 3.x.
                 """
                 name = 'IPython Partial Traceback'
                 tokens = {
                     'root': [
                         # Tracebacks for syntax errors have a different style.
                         # For both types of tracebacks, we mark the first line with
                         # Generic.Traceback.  For syntax errors, we mark the filename
                         # as we mark the filenames for non-syntax tracebacks.
                         #
                         # These two regexps define how IPythonConsoleLexer finds a
                         # traceback.
                         #
                         ## Non-syntax traceback
                         (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
                         ## Syntax traceback
                         (r'^(  File)(.*)(, line )(\d+\n)',
                          bygroups(Generic.Traceback, Name.Namespace,
                                   Generic.Traceback, Literal.Number.Integer)),
                         # (Exception Identifier)(Whitespace)(Traceback Message)
                         (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
                          bygroups(Name.Exception, Generic.Whitespace, Text)),
                         # (Module/Filename)(Text)(Callee)(Function Signature)
                         # Better options for callee and function signature?
                         (r'(.*)( in )(.*)(\(.*\)\n)',
                          bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
                         # Regular line: (Whitespace)(Line Number)(Python Code)
                         (r'(\s*?)(\d+)(.*?\n)',
                          bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
                         # Emphasized line: (Arrow)(Line Number)(Python Code)
                         # Using Exception token so arrow color matches the Exception.
                         (r'(-*>?\s?)(\d+)(.*?\n)',
                          bygroups(Name.Exception, Literal.Number.Integer, Other)),
                         # (Exception Identifier)(Message)
                         (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
                          bygroups(Name.Exception, Text)),
                         # Tag everything else as Other, will be handled later.
                         (r'.*\n', Other),
                     ],
                 }
             class IPythonTracebackLexer(DelegatingLexer):
                 """
                 IPython traceback lexer.
                 For doctests, the tracebacks can be snipped as much as desired with the
                 exception to the lines that designate a traceback. For non-syntax error
                 tracebacks, this is the line of hyphens. For syntax error tracebacks,
                 this is the line which lists the File and line number.
                 """
                 # The lexer inherits from DelegatingLexer.  The "root" lexer is an
                 # appropriate IPython lexer, which depends on the value of the boolean
                 # `python3`.  First, we parse with the partial IPython traceback lexer.
                 # Then, any code marked with the "Other" token is delegated to the root
                 # lexer.
                 #
                 name = 'IPython Traceback'
                 aliases = ['ipythontb']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3tb']
                     else:
                         self.aliases = ['ipython2tb', 'ipythontb']
                     if self.python3:
                         IPyLexer = IPython3Lexer
                     else:
                         IPyLexer = IPythonLexer
                     DelegatingLexer.__init__(self, IPyLexer,
                                              IPythonPartialTracebackLexer, **options)
             @skip_doctest
             class IPythonConsoleLexer(Lexer):
                 """
                 An IPython console lexer for IPython code-blocks and doctests, such as:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: a = 'foo'
                         In [2]: a
                         Out[2]: 'foo'
                         In [3]: print a
                         foo
                         In [4]: 1 / 0
                 Support is also provided for IPython exceptions:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: raise Exception
                         ---------------------------------------------------------------------------
                         Exception                                 Traceback (most recent call last)
                         <ipython-input-1-fca2ab0ca76b> in <module>()
                         ----> 1 raise Exception
                         Exception:
                 """
                 name = 'IPython console session'
                 aliases = ['ipythonconsole']
                 mimetypes = ['text/x-ipython-console']
                 # The regexps used to determine what is input and what is output.
                 # The default prompts for IPython are:
                 #
                 #     c.PromptManager.in_template  = 'In [\#]: '
                 #     c.PromptManager.in2_template = '   .\D.: '
                 #     c.PromptManager.out_template = 'Out[\#]: '
                 #
                 in1_regex = r'In \[[0-9]+\]: '
                 in2_regex = r'   \.\.+\.: '
                 out_regex = r'Out\[[0-9]+\]: '
                 #: The regex to determine when a traceback starts.
                 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^(  File)(.*)(, line )(\d+\n)')
                 def __init__(self, **options):
                     """Initialize the IPython console lexer.
                     Parameters
                     ----------
                     python3 : bool
                         If `True`, then the console inputs are parsed using a Python 3
                         lexer. Otherwise, they are parsed using a Python 2 lexer.
                     in1_regex : RegexObject
                         The compiled regular expression used to detect the start
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     in2_regex : RegexObject
                         The compiled regular expression used to detect the continuation
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     out_regex : RegexObject
                         The compiled regular expression used to detect outputs. If `None`,
                         then the default output prompt is assumed.
                     """
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3console']
                     else:
                         self.aliases = ['ipython2console', 'ipythonconsole']
                     in1_regex = options.get('in1_regex', self.in1_regex)
                     in2_regex = options.get('in2_regex', self.in2_regex)
                     out_regex = options.get('out_regex', self.out_regex)
                     # So that we can work with input and output prompts which have been
                     # rstrip'd (possibly by editors) we also need rstrip'd variants. If
                     # we do not do this, then such prompts will be tagged as 'output'.
                     # The reason can't just use the rstrip'd variants instead is because
                     # we want any whitespace associated with the prompt to be inserted
                     # with the token. This allows formatted code to be modified so as hide
                     # the appearance of prompts, with the whitespace included. One example
                     # use of this is in copybutton.js from the standard lib Python docs.
                     in1_regex_rstrip = in1_regex.rstrip() + '\n'
                     in2_regex_rstrip = in2_regex.rstrip() + '\n'
                     out_regex_rstrip = out_regex.rstrip() + '\n'
                     # Compile and save them all.
                     attrs = ['in1_regex', 'in2_regex', 'out_regex',
                              'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
                     for attr in attrs:
                         self.__setattr__(attr, re.compile(locals()[attr]))
                     Lexer.__init__(self, **options)
                     if self.python3:
                         pylexer = IPython3Lexer
                         tblexer = IPythonTracebackLexer
                     else:
                         pylexer = IPythonLexer
                         tblexer = IPythonTracebackLexer
                     self.pylexer = pylexer(**options)
                     self.tblexer = tblexer(**options)
                     self.reset()
                 def reset(self):
                     self.mode = 'output'
                     self.index = 0
                     self.buffer = u''
                     self.insertions = []
                 def buffered_tokens(self):
                     """
                     Generator of unprocessed tokens after doing insertions and before
                     changing to a new state.
                     """
                     if self.mode == 'output':
                         tokens = [(0, Generic.Output, self.buffer)]
                     elif self.mode == 'input':
                         tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
                     else: # traceback
                         tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
                     for i, t, v in do_insertions(self.insertions, tokens):
                         # All token indexes are relative to the buffer.
                         yield self.index + i, t, v
                     # Clear it all
                     self.index += len(self.buffer)
                     self.buffer = u''
                     self.insertions = []
                 def get_mci(self, line):
                     """
                     Parses the line and returns a 3-tuple: (mode, code, insertion).
                     `mode` is the next mode (or state) of the lexer, and is always equal
                     to 'input', 'output', or 'tb'.
                     `code` is a portion of the line that should be added to the buffer
                     corresponding to the next mode and eventually lexed by another lexer.
                     For example, `code` could be Python code if `mode` were 'input'.
                     `insertion` is a 3-tuple (index, token, text) representing an
                     unprocessed "token" that will be inserted into the stream of tokens
                     that are created from the buffer once we change modes. This is usually
                     the input or output prompt.
                     In general, the next mode depends on current mode and on the contents
                     of `line`.
                     """
                     # To reduce the number of regex match checks, we have multiple
                     # 'if' blocks instead of 'if-elif' blocks.
                     # Check for possible end of input
                     in2_match = self.in2_regex.match(line)
                     in2_match_rstrip = self.in2_regex_rstrip.match(line)
                     if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
                        in2_match_rstrip:
                         end_input = True
                     else:
                         end_input = False
                     if end_input and self.mode != 'tb':
                         # Only look for an end of input when not in tb mode.
                         # An ellipsis could appear within the traceback.
                         mode = 'output'
                         code = u''
                         insertion = (0, Generic.Prompt, line)
                         return mode, code, insertion
                     # Check for output prompt
                     out_match = self.out_regex.match(line)
                     out_match_rstrip = self.out_regex_rstrip.match(line)
                     if out_match or out_match_rstrip:
                         mode = 'output'
                         if out_match:
                             idx = out_match.end()
                         else:
                             idx = out_match_rstrip.end()
                         code = line[idx:]
                         # Use the 'heading' token for output.  We cannot use Generic.Error
                         # since it would conflict with exceptions.
                         insertion = (0, Generic.Heading, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (non stripped version)
                     in1_match = self.in1_regex.match(line)
                     if in1_match or (in2_match and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match:
                             idx = in1_match.end()
                         else: # in2_match
                             idx = in2_match.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (stripped version)
                     in1_match_rstrip = self.in1_regex_rstrip.match(line)
                     if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match_rstrip:
                             idx = in1_match_rstrip.end()
                         else: # in2_match
                             idx = in2_match_rstrip.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for traceback
                     if self.ipytb_start.match(line):
                         mode = 'tb'
                         code = line
                         insertion = None
                         return mode, code, insertion
                     # All other stuff...
                     if self.mode in ('input', 'output'):
                         # We assume all other text is output. Multiline input that
                         # does not use the continuation marker cannot be detected.
                         # For example, the 3 in the following is clearly output:
                         #
                         #    In [1]: print 3
                         #    3
                         #
                         # But the following second line is part of the input:
                         #
                         #    In [2]: while True:
                         #        print True
                         #
                         # In both cases, the 2nd line will be 'output'.
                         #
                         mode = 'output'
                     else:
                         mode = 'tb'
                     code = line
                     insertion = None
                     return mode, code, insertion
                 def get_tokens_unprocessed(self, text):
                     self.reset()
                     for match in line_re.finditer(text):
                         line = match.group()
                         mode, code, insertion = self.get_mci(line)
                         if mode != self.mode:
                             # Yield buffered tokens before transitioning to new mode.
                             for token in self.buffered_tokens():
                                 yield token
                             self.mode = mode
                         if insertion:
                             self.insertions.append((len(self.buffer), [insertion]))
                         self.buffer += code
                     else:
                         for token in self.buffered_tokens():
                             yield token
             class IPyLexer(Lexer):
                 """
                 Primary lexer for all IPython-like code.
                 This is a simple helper lexer.  If the first line of the text begins with
                 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
                 lexer. If not, then the entire text is parsed with an IPython lexer.
                 The goal is to reduce the number of lexers that are registered
                 with Pygments.
                 """
                 name = 'IPy session'
                 aliases = ['ipy']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipy3']
                     else:
                         self.aliases = ['ipy2', 'ipy']
                     Lexer.__init__(self, **options)
                     self.IPythonLexer = IPythonLexer(**options)
                     self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
                 def get_tokens_unprocessed(self, text):
                     # Search for the input prompt anywhere...this allows code blocks to
                     # begin with comments as well.
                     if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
                         lex = self.IPythonConsoleLexer
                     else:
                         lex = self.IPythonLexer
                     for token in lex.get_tokens_unprocessed(text):
                         yield token

             """Test lexers module"""
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2014 The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             from pygments.token import Token
             from IPython.nbconvert.tests.base import TestsBase
             from .. import lexers
             #-----------------------------------------------------------------------------
             # Classes and functions
             #-----------------------------------------------------------------------------
             class TestLexers(TestsBase):
                 """Collection of lexers tests"""
                 def setUp(self):
                     self.lexer = lexers.IPythonLexer()
                 def testIPythonLexer(self):
                     fragment = '!echo $HOME\n'
                     tokens = [
                         (Token.Operator, '!'),
                         (Token.Name.Builtin, 'echo'),
                         (Token.Text, ' '),
                         (Token.Name.Variable, '$HOME'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
                     fragment_2 = '!' + fragment
                     tokens_2 = [
                         (Token.Operator, '!!'),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = %sx ' + fragment[1:]
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'sx'),
                         (Token.Text, ' '),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'f = %R function () {}\n'
                     tokens_2 = [
                         (Token.Name, 'f'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'R'),
                         (Token.Text, ' function () {}\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = '\t%%xyz\n$foo\n'
                     tokens_2 = [
                         (Token.Text, '\t'),
                         (Token.Operator, '%%'),
                         (Token.Keyword, 'xyz'),
                         (Token.Text, '\n$foo\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
+                    fragment_2 = '%system?\n'
+                    tokens_2 = [
+                        (Token.Operator, '%'),
+                        (Token.Keyword, 'system'),
+                        (Token.Operator, '?'),
+                        (Token.Text, '\n'),
+                    ]
+                    self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))