upstream/ipython Commit - r21712:9a747ac9

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

5

This includes:

6

7

IPythonLexer, IPython3Lexer

7

IPythonLexer, IPython3Lexer

8

Lexers for pure IPython (python + magic/shell commands)

8

Lexers for pure IPython (python + magic/shell commands)

9

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

12

lexer reads everything but the Python code appearing in a traceback.

12

lexer reads everything but the Python code appearing in a traceback.

13

The full lexer combines the partial lexer with an IPython lexer.

13

The full lexer combines the partial lexer with an IPython lexer.

14

15

IPythonConsoleLexer

15

IPythonConsoleLexer

16

A lexer for IPython console sessions, with support for tracebacks.

16

A lexer for IPython console sessions, with support for tracebacks.

17

18

IPyLexer

18

IPyLexer

19

A friendly lexer which examines the first line of text and from it,

19

A friendly lexer which examines the first line of text and from it,

20

decides whether to use an IPython lexer or an IPython console lexer.

20

decides whether to use an IPython lexer or an IPython console lexer.

21

This is probably the only lexer that needs to be explicitly added

21

This is probably the only lexer that needs to be explicitly added

22

to Pygments.

22

to Pygments.

23

24

"""

24

"""

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

27

#

27

#

28

# Distributed under the terms of the Modified BSD License.

28

# Distributed under the terms of the Modified BSD License.

29

#

29

#

30

# The full license is in the file COPYING.txt, distributed with this software.

30

# The full license is in the file COPYING.txt, distributed with this software.

31

#-----------------------------------------------------------------------------

31

#-----------------------------------------------------------------------------

32

33

# Standard library

33

# Standard library

34

import re

34

import re

35

36

# Third party

36

# Third party

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

38

from pygments.lexer import (

38

from pygments.lexer import (

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

40

)

40

)

41

from pygments.token import (

41

from pygments.token import (

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

43

)

43

)

44

from pygments.util import get_bool_opt

44

from pygments.util import get_bool_opt

45

46

# Local

46

# Local

47

48

line_re = re.compile('.*?\n')

48

line_re = re.compile('.*?\n')

49

50

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',

50

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',

51

'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',

51

'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',

52

'IPythonConsoleLexer', 'IPyLexer']

52

'IPythonConsoleLexer', 'IPyLexer']

53

54

ipython_tokens = [

54

ipython_tokens = [

55

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

55

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

56

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

56

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

57

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

57

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

58

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

58

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

59

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

59

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

60

using(BashLexer), Text)),

60

using(BashLexer), Text)),

61

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

61

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

62

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

62

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

63

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

63

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

64

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

64

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

65

(r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),

65

]

66

]

66

67

def build_ipy_lexer(python3):

68

def build_ipy_lexer(python3):

68

"""Builds IPython lexers depending on the value of `python3`.

69

"""Builds IPython lexers depending on the value of `python3`.

69

70

The lexer inherits from an appropriate Python lexer and then adds

71

The lexer inherits from an appropriate Python lexer and then adds

71

information about IPython specific keywords (i.e. magic commands,

72

information about IPython specific keywords (i.e. magic commands,

72

shell commands, etc.)

73

shell commands, etc.)

73

74

Parameters

75

Parameters

75

----------

76

----------

76

python3 : bool

77

python3 : bool

77

If `True`, then build an IPython lexer from a Python 3 lexer.

78

If `True`, then build an IPython lexer from a Python 3 lexer.

78

79

"""

80

"""

80

# It would be nice to have a single IPython lexer class which takes

81

# It would be nice to have a single IPython lexer class which takes

81

# a boolean `python3`. But since there are two Python lexer classes,

82

# a boolean `python3`. But since there are two Python lexer classes,

82

# we will also have two IPython lexer classes.

83

# we will also have two IPython lexer classes.

83

if python3:

84

if python3:

84

PyLexer = Python3Lexer

85

PyLexer = Python3Lexer

85

clsname = 'IPython3Lexer'

86

clsname = 'IPython3Lexer'

86

name = 'IPython3'

87

name = 'IPython3'

87

aliases = ['ipython3']

88

aliases = ['ipython3']

88

doc = """IPython3 Lexer"""

89

doc = """IPython3 Lexer"""

89

else:

90

else:

90

PyLexer = PythonLexer

91

PyLexer = PythonLexer

91

clsname = 'IPythonLexer'

92

clsname = 'IPythonLexer'

92

name = 'IPython'

93

name = 'IPython'

93

aliases = ['ipython2', 'ipython']

94

aliases = ['ipython2', 'ipython']

94

doc = """IPython Lexer"""

95

doc = """IPython Lexer"""

95

96

tokens = PyLexer.tokens.copy()

97

tokens = PyLexer.tokens.copy()

97

tokens['root'] = ipython_tokens + tokens['root']

98

tokens['root'] = ipython_tokens + tokens['root']

98

99

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

100

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

100

'__doc__': doc, 'tokens': tokens}

101

'__doc__': doc, 'tokens': tokens}

101

102

return type(name, (PyLexer,), attrs)

103

return type(name, (PyLexer,), attrs)

103

104

105

IPython3Lexer = build_ipy_lexer(python3=True)

106

IPython3Lexer = build_ipy_lexer(python3=True)

106

IPythonLexer = build_ipy_lexer(python3=False)

107

IPythonLexer = build_ipy_lexer(python3=False)

107

108

109

class IPythonPartialTracebackLexer(RegexLexer):

110

class IPythonPartialTracebackLexer(RegexLexer):

110

"""

111

"""

111

Partial lexer for IPython tracebacks.

112

Partial lexer for IPython tracebacks.

112

113

Handles all the non-python output. This works for both Python 2.x and 3.x.

114

Handles all the non-python output. This works for both Python 2.x and 3.x.

114

115

"""

116

"""

116

name = 'IPython Partial Traceback'

117

name = 'IPython Partial Traceback'

117

118

tokens = {

119

tokens = {

119

'root': [

120

'root': [

120

# Tracebacks for syntax errors have a different style.

121

# Tracebacks for syntax errors have a different style.

121

# For both types of tracebacks, we mark the first line with

122

# For both types of tracebacks, we mark the first line with

122

# Generic.Traceback. For syntax errors, we mark the filename

123

# Generic.Traceback. For syntax errors, we mark the filename

123

# as we mark the filenames for non-syntax tracebacks.

124

# as we mark the filenames for non-syntax tracebacks.

124

#

125

#

125

# These two regexps define how IPythonConsoleLexer finds a

126

# These two regexps define how IPythonConsoleLexer finds a

126

# traceback.

127

# traceback.

127

#

128

#

128

## Non-syntax traceback

129

## Non-syntax traceback

129

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

130

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

130

## Syntax traceback

131

## Syntax traceback

131

(r'^( File)(.*)(, line )(\d+\n)',

132

(r'^( File)(.*)(, line )(\d+\n)',

132

bygroups(Generic.Traceback, Name.Namespace,

133

bygroups(Generic.Traceback, Name.Namespace,

133

Generic.Traceback, Literal.Number.Integer)),

134

Generic.Traceback, Literal.Number.Integer)),

134

135

# (Exception Identifier)(Whitespace)(Traceback Message)

136

# (Exception Identifier)(Whitespace)(Traceback Message)

136

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

137

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

137

bygroups(Name.Exception, Generic.Whitespace, Text)),

138

bygroups(Name.Exception, Generic.Whitespace, Text)),

138

# (Module/Filename)(Text)(Callee)(Function Signature)

139

# (Module/Filename)(Text)(Callee)(Function Signature)

139

# Better options for callee and function signature?

140

# Better options for callee and function signature?

140

(r'(.*)( in )(.*)($.*$\n)',

141

(r'(.*)( in )(.*)($.*$\n)',

141

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

142

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

142

# Regular line: (Whitespace)(Line Number)(Python Code)

143

# Regular line: (Whitespace)(Line Number)(Python Code)

143

(r'(\s*?)(\d+)(.*?\n)',

144

(r'(\s*?)(\d+)(.*?\n)',

144

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

145

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

145

# Emphasized line: (Arrow)(Line Number)(Python Code)

146

# Emphasized line: (Arrow)(Line Number)(Python Code)

146

# Using Exception token so arrow color matches the Exception.

147

# Using Exception token so arrow color matches the Exception.

147

(r'(-*>?\s?)(\d+)(.*?\n)',

148

(r'(-*>?\s?)(\d+)(.*?\n)',

148

bygroups(Name.Exception, Literal.Number.Integer, Other)),

149

bygroups(Name.Exception, Literal.Number.Integer, Other)),

149

# (Exception Identifier)(Message)

150

# (Exception Identifier)(Message)

150

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

151

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

151

bygroups(Name.Exception, Text)),

152

bygroups(Name.Exception, Text)),

152

# Tag everything else as Other, will be handled later.

153

# Tag everything else as Other, will be handled later.

153

(r'.*\n', Other),

154

(r'.*\n', Other),

154

],

155

],

155

}

156

}

156

157

158

class IPythonTracebackLexer(DelegatingLexer):

159

class IPythonTracebackLexer(DelegatingLexer):

159

"""

160

"""

160

IPython traceback lexer.

161

IPython traceback lexer.

161

162

For doctests, the tracebacks can be snipped as much as desired with the

163

For doctests, the tracebacks can be snipped as much as desired with the

163

exception to the lines that designate a traceback. For non-syntax error

164

exception to the lines that designate a traceback. For non-syntax error

164

tracebacks, this is the line of hyphens. For syntax error tracebacks,

165

tracebacks, this is the line of hyphens. For syntax error tracebacks,

165

this is the line which lists the File and line number.

166

this is the line which lists the File and line number.

166

167

"""

168

"""

168

# The lexer inherits from DelegatingLexer. The "root" lexer is an

169

# The lexer inherits from DelegatingLexer. The "root" lexer is an

169

# appropriate IPython lexer, which depends on the value of the boolean

170

# appropriate IPython lexer, which depends on the value of the boolean

170

# `python3`. First, we parse with the partial IPython traceback lexer.

171

# `python3`. First, we parse with the partial IPython traceback lexer.

171

# Then, any code marked with the "Other" token is delegated to the root

172

# Then, any code marked with the "Other" token is delegated to the root

172

# lexer.

173

# lexer.

173

#

174

#

174

name = 'IPython Traceback'

175

name = 'IPython Traceback'

175

aliases = ['ipythontb']

176

aliases = ['ipythontb']

176

177

def __init__(self, **options):

178

def __init__(self, **options):

178

self.python3 = get_bool_opt(options, 'python3', False)

179

self.python3 = get_bool_opt(options, 'python3', False)

179

if self.python3:

180

if self.python3:

180

self.aliases = ['ipython3tb']

181

self.aliases = ['ipython3tb']

181

else:

182

else:

182

self.aliases = ['ipython2tb', 'ipythontb']

183

self.aliases = ['ipython2tb', 'ipythontb']

183

184

if self.python3:

185

if self.python3:

185

IPyLexer = IPython3Lexer

186

IPyLexer = IPython3Lexer

186

else:

187

else:

187

IPyLexer = IPythonLexer

188

IPyLexer = IPythonLexer

188

189

DelegatingLexer.__init__(self, IPyLexer,

190

DelegatingLexer.__init__(self, IPyLexer,

190

IPythonPartialTracebackLexer, **options)

191

IPythonPartialTracebackLexer, **options)

191

192

class IPythonConsoleLexer(Lexer):

193

class IPythonConsoleLexer(Lexer):

193

"""

194

"""

194

An IPython console lexer for IPython code-blocks and doctests, such as:

195

An IPython console lexer for IPython code-blocks and doctests, such as:

195

196

.. code-block:: rst

197

.. code-block:: rst

197

198

.. code-block:: ipythonconsole

199

.. code-block:: ipythonconsole

199

200

In [1]: a = 'foo'

201

In [1]: a = 'foo'

201

202

In [2]: a

203

In [2]: a

203

Out[2]: 'foo'

204

Out[2]: 'foo'

204

205

In [3]: print a

206

In [3]: print a

206

foo

207

foo

207

208

In [4]: 1 / 0

209

In [4]: 1 / 0

209

210

211

Support is also provided for IPython exceptions:

212

Support is also provided for IPython exceptions:

212

213

.. code-block:: rst

214

.. code-block:: rst

214

215

.. code-block:: ipythonconsole

216

.. code-block:: ipythonconsole

216

217

In [1]: raise Exception

218

In [1]: raise Exception

218

219

---------------------------------------------------------------------------

220

---------------------------------------------------------------------------

220

Exception Traceback (most recent call last)

221

Exception Traceback (most recent call last)

221

<ipython-input-1-fca2ab0ca76b> in <module>()

222

<ipython-input-1-fca2ab0ca76b> in <module>()

222

----> 1 raise Exception

223

----> 1 raise Exception

223

224

Exception:

225

Exception:

225

226

"""

227

"""

227

name = 'IPython console session'

228

name = 'IPython console session'

228

aliases = ['ipythonconsole']

229

aliases = ['ipythonconsole']

229

mimetypes = ['text/x-ipython-console']

230

mimetypes = ['text/x-ipython-console']

230

231

# The regexps used to determine what is input and what is output.

232

# The regexps used to determine what is input and what is output.

232

# The default prompts for IPython are:

233

# The default prompts for IPython are:

233

#

234

#

234

# c.PromptManager.in_template = 'In [\#]: '

235

# c.PromptManager.in_template = 'In [\#]: '

235

# c.PromptManager.in2_template = ' .\D.: '

236

# c.PromptManager.in2_template = ' .\D.: '

236

# c.PromptManager.out_template = 'Out[\#]: '

237

# c.PromptManager.out_template = 'Out[\#]: '

237

#

238

#

238

in1_regex = r'In \[[0-9]+\]: '

239

in1_regex = r'In \[[0-9]+\]: '

239

in2_regex = r' \.\.+\.: '

240

in2_regex = r' \.\.+\.: '

240

out_regex = r'Out\[[0-9]+\]: '

241

out_regex = r'Out\[[0-9]+\]: '

241

242

#: The regex to determine when a traceback starts.

243

#: The regex to determine when a traceback starts.

243

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

244

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

244

245

def __init__(self, **options):

246

def __init__(self, **options):

246

"""Initialize the IPython console lexer.

247

"""Initialize the IPython console lexer.

247

248

Parameters

249

Parameters

249

----------

250

----------

250

python3 : bool

251

python3 : bool

251

If `True`, then the console inputs are parsed using a Python 3

252

If `True`, then the console inputs are parsed using a Python 3

252

lexer. Otherwise, they are parsed using a Python 2 lexer.

253

lexer. Otherwise, they are parsed using a Python 2 lexer.

253

in1_regex : RegexObject

254

in1_regex : RegexObject

254

The compiled regular expression used to detect the start

255

The compiled regular expression used to detect the start

255

of inputs. Although the IPython configuration setting may have a

256

of inputs. Although the IPython configuration setting may have a

256

trailing whitespace, do not include it in the regex. If `None`,

257

trailing whitespace, do not include it in the regex. If `None`,

257

then the default input prompt is assumed.

258

then the default input prompt is assumed.

258

in2_regex : RegexObject

259

in2_regex : RegexObject

259

The compiled regular expression used to detect the continuation

260

The compiled regular expression used to detect the continuation

260

of inputs. Although the IPython configuration setting may have a

261

of inputs. Although the IPython configuration setting may have a

261

trailing whitespace, do not include it in the regex. If `None`,

262

trailing whitespace, do not include it in the regex. If `None`,

262

then the default input prompt is assumed.

263

then the default input prompt is assumed.

263

out_regex : RegexObject

264

out_regex : RegexObject

264

The compiled regular expression used to detect outputs. If `None`,

265

The compiled regular expression used to detect outputs. If `None`,

265

then the default output prompt is assumed.

266

then the default output prompt is assumed.

266

267

"""

268

"""

268

self.python3 = get_bool_opt(options, 'python3', False)

269

self.python3 = get_bool_opt(options, 'python3', False)

269

if self.python3:

270

if self.python3:

270

self.aliases = ['ipython3console']

271

self.aliases = ['ipython3console']

271

else:

272

else:

272

self.aliases = ['ipython2console', 'ipythonconsole']

273

self.aliases = ['ipython2console', 'ipythonconsole']

273

274

in1_regex = options.get('in1_regex', self.in1_regex)

275

in1_regex = options.get('in1_regex', self.in1_regex)

275

in2_regex = options.get('in2_regex', self.in2_regex)

276

in2_regex = options.get('in2_regex', self.in2_regex)

276

out_regex = options.get('out_regex', self.out_regex)

277

out_regex = options.get('out_regex', self.out_regex)

277

278

# So that we can work with input and output prompts which have been

279

# So that we can work with input and output prompts which have been

279

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

280

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

280

# we do not do this, then such prompts will be tagged as 'output'.

281

# we do not do this, then such prompts will be tagged as 'output'.

281

# The reason can't just use the rstrip'd variants instead is because

282

# The reason can't just use the rstrip'd variants instead is because

282

# we want any whitespace associated with the prompt to be inserted

283

# we want any whitespace associated with the prompt to be inserted

283

# with the token. This allows formatted code to be modified so as hide

284

# with the token. This allows formatted code to be modified so as hide

284

# the appearance of prompts, with the whitespace included. One example

285

# the appearance of prompts, with the whitespace included. One example

285

# use of this is in copybutton.js from the standard lib Python docs.

286

# use of this is in copybutton.js from the standard lib Python docs.

286

in1_regex_rstrip = in1_regex.rstrip() + '\n'

287

in1_regex_rstrip = in1_regex.rstrip() + '\n'

287

in2_regex_rstrip = in2_regex.rstrip() + '\n'

288

in2_regex_rstrip = in2_regex.rstrip() + '\n'

288

out_regex_rstrip = out_regex.rstrip() + '\n'

289

out_regex_rstrip = out_regex.rstrip() + '\n'

289

290

# Compile and save them all.

291

# Compile and save them all.

291

attrs = ['in1_regex', 'in2_regex', 'out_regex',

292

attrs = ['in1_regex', 'in2_regex', 'out_regex',

292

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

293

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

293

for attr in attrs:

294

for attr in attrs:

294

self.__setattr__(attr, re.compile(locals()[attr]))

295

self.__setattr__(attr, re.compile(locals()[attr]))

295

296

Lexer.__init__(self, **options)

297

Lexer.__init__(self, **options)

297

298

if self.python3:

299

if self.python3:

299

pylexer = IPython3Lexer

300

pylexer = IPython3Lexer

300

tblexer = IPythonTracebackLexer

301

tblexer = IPythonTracebackLexer

301

else:

302

else:

302

pylexer = IPythonLexer

303

pylexer = IPythonLexer

303

tblexer = IPythonTracebackLexer

304

tblexer = IPythonTracebackLexer

304

305

self.pylexer = pylexer(**options)

306

self.pylexer = pylexer(**options)

306

self.tblexer = tblexer(**options)

307

self.tblexer = tblexer(**options)

307

308

self.reset()

309

self.reset()

309

310

def reset(self):

311

def reset(self):

311

self.mode = 'output'

312

self.mode = 'output'

312

self.index = 0

313

self.index = 0

313

self.buffer = u''

314

self.buffer = u''

314

self.insertions = []

315

self.insertions = []

315

316

def buffered_tokens(self):

317

def buffered_tokens(self):

317

"""

318

"""

318

Generator of unprocessed tokens after doing insertions and before

319

Generator of unprocessed tokens after doing insertions and before

319

changing to a new state.

320

changing to a new state.

320

321

"""

322

"""

322

if self.mode == 'output':

323

if self.mode == 'output':

323

tokens = [(0, Generic.Output, self.buffer)]

324

tokens = [(0, Generic.Output, self.buffer)]

324

elif self.mode == 'input':

325

elif self.mode == 'input':

325

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

326

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

326

else: # traceback

327

else: # traceback

327

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

328

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

328

329

for i, t, v in do_insertions(self.insertions, tokens):

330

for i, t, v in do_insertions(self.insertions, tokens):

330

# All token indexes are relative to the buffer.

331

# All token indexes are relative to the buffer.

331

yield self.index + i, t, v

332

yield self.index + i, t, v

332

333

# Clear it all

334

# Clear it all

334

self.index += len(self.buffer)

335

self.index += len(self.buffer)

335

self.buffer = u''

336

self.buffer = u''

336

self.insertions = []

337

self.insertions = []

337

338

def get_mci(self, line):

339

def get_mci(self, line):

339

"""

340

"""

340

Parses the line and returns a 3-tuple: (mode, code, insertion).

341

Parses the line and returns a 3-tuple: (mode, code, insertion).

341

342

`mode` is the next mode (or state) of the lexer, and is always equal

343

`mode` is the next mode (or state) of the lexer, and is always equal

343

to 'input', 'output', or 'tb'.

344

to 'input', 'output', or 'tb'.

344

345

`code` is a portion of the line that should be added to the buffer

346

`code` is a portion of the line that should be added to the buffer

346

corresponding to the next mode and eventually lexed by another lexer.

347

corresponding to the next mode and eventually lexed by another lexer.

347

For example, `code` could be Python code if `mode` were 'input'.

348

For example, `code` could be Python code if `mode` were 'input'.

348

349

`insertion` is a 3-tuple (index, token, text) representing an

350

`insertion` is a 3-tuple (index, token, text) representing an

350

unprocessed "token" that will be inserted into the stream of tokens

351

unprocessed "token" that will be inserted into the stream of tokens

351

that are created from the buffer once we change modes. This is usually

352

that are created from the buffer once we change modes. This is usually

352

the input or output prompt.

353

the input or output prompt.

353

354

In general, the next mode depends on current mode and on the contents

355

In general, the next mode depends on current mode and on the contents

355

of `line`.

356

of `line`.

356

357

"""

358

"""

358

# To reduce the number of regex match checks, we have multiple

359

# To reduce the number of regex match checks, we have multiple

359

# 'if' blocks instead of 'if-elif' blocks.

360

# 'if' blocks instead of 'if-elif' blocks.

360

361

# Check for possible end of input

362

# Check for possible end of input

362

in2_match = self.in2_regex.match(line)

363

in2_match = self.in2_regex.match(line)

363

in2_match_rstrip = self.in2_regex_rstrip.match(line)

364

in2_match_rstrip = self.in2_regex_rstrip.match(line)

364

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

365

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

365

in2_match_rstrip:

366

in2_match_rstrip:

366

end_input = True

367

end_input = True

367

else:

368

else:

368

end_input = False

369

end_input = False

369

if end_input and self.mode != 'tb':

370

if end_input and self.mode != 'tb':

370

# Only look for an end of input when not in tb mode.

371

# Only look for an end of input when not in tb mode.

371

# An ellipsis could appear within the traceback.

372

# An ellipsis could appear within the traceback.

372

mode = 'output'

373

mode = 'output'

373

code = u''

374

code = u''

374

insertion = (0, Generic.Prompt, line)

375

insertion = (0, Generic.Prompt, line)

375

return mode, code, insertion

376

return mode, code, insertion

376

377

# Check for output prompt

378

# Check for output prompt

378

out_match = self.out_regex.match(line)

379

out_match = self.out_regex.match(line)

379

out_match_rstrip = self.out_regex_rstrip.match(line)

380

out_match_rstrip = self.out_regex_rstrip.match(line)

380

if out_match or out_match_rstrip:

381

if out_match or out_match_rstrip:

381

mode = 'output'

382

mode = 'output'

382

if out_match:

383

if out_match:

383

idx = out_match.end()

384

idx = out_match.end()

384

else:

385

else:

385

idx = out_match_rstrip.end()

386

idx = out_match_rstrip.end()

386

code = line[idx:]

387

code = line[idx:]

387

# Use the 'heading' token for output. We cannot use Generic.Error

388

# Use the 'heading' token for output. We cannot use Generic.Error

388

# since it would conflict with exceptions.

389

# since it would conflict with exceptions.

389

insertion = (0, Generic.Heading, line[:idx])

390

insertion = (0, Generic.Heading, line[:idx])

390

return mode, code, insertion

391

return mode, code, insertion

391

392

393

# Check for input or continuation prompt (non stripped version)

394

# Check for input or continuation prompt (non stripped version)

394

in1_match = self.in1_regex.match(line)

395

in1_match = self.in1_regex.match(line)

395

if in1_match or (in2_match and self.mode != 'tb'):

396

if in1_match or (in2_match and self.mode != 'tb'):

396

# New input or when not in tb, continued input.

397

# New input or when not in tb, continued input.

397

# We do not check for continued input when in tb since it is

398

# We do not check for continued input when in tb since it is

398

# allowable to replace a long stack with an ellipsis.

399

# allowable to replace a long stack with an ellipsis.

399

mode = 'input'

400

mode = 'input'

400

if in1_match:

401

if in1_match:

401

idx = in1_match.end()

402

idx = in1_match.end()

402

else: # in2_match

403

else: # in2_match

403

idx = in2_match.end()

404

idx = in2_match.end()

404

code = line[idx:]

405

code = line[idx:]

405

insertion = (0, Generic.Prompt, line[:idx])

406

insertion = (0, Generic.Prompt, line[:idx])

406

return mode, code, insertion

407

return mode, code, insertion

407

408

# Check for input or continuation prompt (stripped version)

409

# Check for input or continuation prompt (stripped version)

409

in1_match_rstrip = self.in1_regex_rstrip.match(line)

410

in1_match_rstrip = self.in1_regex_rstrip.match(line)

410

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

411

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

411

# New input or when not in tb, continued input.

412

# New input or when not in tb, continued input.

412

# We do not check for continued input when in tb since it is

413

# We do not check for continued input when in tb since it is

413

# allowable to replace a long stack with an ellipsis.

414

# allowable to replace a long stack with an ellipsis.

414

mode = 'input'

415

mode = 'input'

415

if in1_match_rstrip:

416

if in1_match_rstrip:

416

idx = in1_match_rstrip.end()

417

idx = in1_match_rstrip.end()

417

else: # in2_match

418

else: # in2_match

418

idx = in2_match_rstrip.end()

419

idx = in2_match_rstrip.end()

419

code = line[idx:]

420

code = line[idx:]

420

insertion = (0, Generic.Prompt, line[:idx])

421

insertion = (0, Generic.Prompt, line[:idx])

421

return mode, code, insertion

422

return mode, code, insertion

422

423

# Check for traceback

424

# Check for traceback

424

if self.ipytb_start.match(line):

425

if self.ipytb_start.match(line):

425

mode = 'tb'

426

mode = 'tb'

426

code = line

427

code = line

427

insertion = None

428

insertion = None

428

return mode, code, insertion

429

return mode, code, insertion

429

430

# All other stuff...

431

# All other stuff...

431

if self.mode in ('input', 'output'):

432

if self.mode in ('input', 'output'):

432

# We assume all other text is output. Multiline input that

433

# We assume all other text is output. Multiline input that

433

# does not use the continuation marker cannot be detected.

434

# does not use the continuation marker cannot be detected.

434

# For example, the 3 in the following is clearly output:

435

# For example, the 3 in the following is clearly output:

435

#

436

#

436

# In [1]: print 3

437

# In [1]: print 3

437

# 3

438

# 3

438

#

439

#

439

# But the following second line is part of the input:

440

# But the following second line is part of the input:

440

#

441

#

441

# In [2]: while True:

442

# In [2]: while True:

442

# print True

443

# print True

443

#

444

#

444

# In both cases, the 2nd line will be 'output'.

445

# In both cases, the 2nd line will be 'output'.

445

#

446

#

446

mode = 'output'

447

mode = 'output'

447

else:

448

else:

448

mode = 'tb'

449

mode = 'tb'

449

450

code = line

451

code = line

451

insertion = None

452

insertion = None

452

453

return mode, code, insertion

454

return mode, code, insertion

454

455

def get_tokens_unprocessed(self, text):

456

def get_tokens_unprocessed(self, text):

456

self.reset()

457

self.reset()

457

for match in line_re.finditer(text):

458

for match in line_re.finditer(text):

458

line = match.group()

459

line = match.group()

459

mode, code, insertion = self.get_mci(line)

460

mode, code, insertion = self.get_mci(line)

460

461

if mode != self.mode:

462

if mode != self.mode:

462

# Yield buffered tokens before transitioning to new mode.

463

# Yield buffered tokens before transitioning to new mode.

463

for token in self.buffered_tokens():

464

for token in self.buffered_tokens():

464

yield token

465

yield token

465

self.mode = mode

466

self.mode = mode

466

467

if insertion:

468

if insertion:

468

self.insertions.append((len(self.buffer), [insertion]))

469

self.insertions.append((len(self.buffer), [insertion]))

469

self.buffer += code

470

self.buffer += code

470

else:

471

else:

471

for token in self.buffered_tokens():

472

for token in self.buffered_tokens():

472

yield token

473

yield token

473

474

class IPyLexer(Lexer):

475

class IPyLexer(Lexer):

475

"""

476

"""

476

Primary lexer for all IPython-like code.

477

Primary lexer for all IPython-like code.

477

478

This is a simple helper lexer. If the first line of the text begins with

479

This is a simple helper lexer. If the first line of the text begins with

479

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

480

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

480

lexer. If not, then the entire text is parsed with an IPython lexer.

481

lexer. If not, then the entire text is parsed with an IPython lexer.

481

482

The goal is to reduce the number of lexers that are registered

483

The goal is to reduce the number of lexers that are registered

483

with Pygments.

484

with Pygments.

484

485

"""

486

"""

486

name = 'IPy session'

487

name = 'IPy session'

487

aliases = ['ipy']

488

aliases = ['ipy']

488

489

def __init__(self, **options):

490

def __init__(self, **options):

490

self.python3 = get_bool_opt(options, 'python3', False)

491

self.python3 = get_bool_opt(options, 'python3', False)

491

if self.python3:

492

if self.python3:

492

self.aliases = ['ipy3']

493

self.aliases = ['ipy3']

493

else:

494

else:

494

self.aliases = ['ipy2', 'ipy']

495

self.aliases = ['ipy2', 'ipy']

495

496

Lexer.__init__(self, **options)

497

Lexer.__init__(self, **options)

497

498

self.IPythonLexer = IPythonLexer(**options)

499

self.IPythonLexer = IPythonLexer(**options)

499

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

500

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

500

501

def get_tokens_unprocessed(self, text):

502

def get_tokens_unprocessed(self, text):

502

# Search for the input prompt anywhere...this allows code blocks to

503

# Search for the input prompt anywhere...this allows code blocks to

503

# begin with comments as well.

504

# begin with comments as well.

504

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

505

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

505

lex = self.IPythonConsoleLexer

506

lex = self.IPythonConsoleLexer

506

else:

507

else:

507

lex = self.IPythonLexer

508

lex = self.IPythonLexer

508

for token in lex.get_tokens_unprocessed(text):

509

for token in lex.get_tokens_unprocessed(text):

509

yield token

510

yield token

510

511

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
             Defines a variety of Pygments lexers for highlighting IPython code.
             This includes:
                 IPythonLexer, IPython3Lexer
                     Lexers for pure IPython (python + magic/shell commands)
                 IPythonPartialTracebackLexer, IPythonTracebackLexer
                     Supports 2.x and 3.x via keyword `python3`.  The partial traceback
                     lexer reads everything but the Python code appearing in a traceback.
                     The full lexer combines the partial lexer with an IPython lexer.
                 IPythonConsoleLexer
                     A lexer for IPython console sessions, with support for tracebacks.
                 IPyLexer
                     A friendly lexer which examines the first line of text and from it,
                     decides whether to use an IPython lexer or an IPython console lexer.
                     This is probably the only lexer that needs to be explicitly added
                     to Pygments.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             # Standard library
             import re
             # Third party
             from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
             from pygments.lexer import (
                 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
             )
             from pygments.token import (
                 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
             )
             from pygments.util import get_bool_opt
             # Local
             line_re = re.compile('.*?\n')
             __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
                        'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
                        'IPythonConsoleLexer', 'IPyLexer']
             ipython_tokens = [
               (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
               (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
               (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
               (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
               (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                    using(BashLexer), Text)),
               (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
               (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
               (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
               (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
+              (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
             ]
             def build_ipy_lexer(python3):
                 """Builds IPython lexers depending on the value of `python3`.
                 The lexer inherits from an appropriate Python lexer and then adds
                 information about IPython specific keywords (i.e. magic commands,
                 shell commands, etc.)
                 Parameters
                 ----------
                 python3 : bool
                     If `True`, then build an IPython lexer from a Python 3 lexer.
                 """
                 # It would be nice to have a single IPython lexer class which takes
                 # a boolean `python3`.  But since there are two Python lexer classes,
                 # we will also have two IPython lexer classes.
                 if python3:
                     PyLexer = Python3Lexer
                     clsname = 'IPython3Lexer'
                     name = 'IPython3'
                     aliases = ['ipython3']
                     doc = """IPython3 Lexer"""
                 else:
                     PyLexer = PythonLexer
                     clsname = 'IPythonLexer'
                     name = 'IPython'
                     aliases = ['ipython2', 'ipython']
                     doc = """IPython Lexer"""
                 tokens = PyLexer.tokens.copy()
                 tokens['root'] = ipython_tokens + tokens['root']
                 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
                          '__doc__': doc, 'tokens': tokens}
                 return type(name, (PyLexer,), attrs)
             IPython3Lexer = build_ipy_lexer(python3=True)
             IPythonLexer = build_ipy_lexer(python3=False)
             class IPythonPartialTracebackLexer(RegexLexer):
                 """
                 Partial lexer for IPython tracebacks.
                 Handles all the non-python output. This works for both Python 2.x and 3.x.
                 """
                 name = 'IPython Partial Traceback'
                 tokens = {
                     'root': [
                         # Tracebacks for syntax errors have a different style.
                         # For both types of tracebacks, we mark the first line with
                         # Generic.Traceback.  For syntax errors, we mark the filename
                         # as we mark the filenames for non-syntax tracebacks.
                         #
                         # These two regexps define how IPythonConsoleLexer finds a
                         # traceback.
                         #
                         ## Non-syntax traceback
                         (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
                         ## Syntax traceback
                         (r'^(  File)(.*)(, line )(\d+\n)',
                          bygroups(Generic.Traceback, Name.Namespace,
                                   Generic.Traceback, Literal.Number.Integer)),
                         # (Exception Identifier)(Whitespace)(Traceback Message)
                         (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
                          bygroups(Name.Exception, Generic.Whitespace, Text)),
                         # (Module/Filename)(Text)(Callee)(Function Signature)
                         # Better options for callee and function signature?
                         (r'(.*)( in )(.*)(\(.*\)\n)',
                          bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
                         # Regular line: (Whitespace)(Line Number)(Python Code)
                         (r'(\s*?)(\d+)(.*?\n)',
                          bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
                         # Emphasized line: (Arrow)(Line Number)(Python Code)
                         # Using Exception token so arrow color matches the Exception.
                         (r'(-*>?\s?)(\d+)(.*?\n)',
                          bygroups(Name.Exception, Literal.Number.Integer, Other)),
                         # (Exception Identifier)(Message)
                         (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
                          bygroups(Name.Exception, Text)),
                         # Tag everything else as Other, will be handled later.
                         (r'.*\n', Other),
                     ],
                 }
             class IPythonTracebackLexer(DelegatingLexer):
                 """
                 IPython traceback lexer.
                 For doctests, the tracebacks can be snipped as much as desired with the
                 exception to the lines that designate a traceback. For non-syntax error
                 tracebacks, this is the line of hyphens. For syntax error tracebacks,
                 this is the line which lists the File and line number.
                 """
                 # The lexer inherits from DelegatingLexer.  The "root" lexer is an
                 # appropriate IPython lexer, which depends on the value of the boolean
                 # `python3`.  First, we parse with the partial IPython traceback lexer.
                 # Then, any code marked with the "Other" token is delegated to the root
                 # lexer.
                 #
                 name = 'IPython Traceback'
                 aliases = ['ipythontb']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3tb']
                     else:
                         self.aliases = ['ipython2tb', 'ipythontb']
                     if self.python3:
                         IPyLexer = IPython3Lexer
                     else:
                         IPyLexer = IPythonLexer
                     DelegatingLexer.__init__(self, IPyLexer,
                                              IPythonPartialTracebackLexer, **options)
             class IPythonConsoleLexer(Lexer):
                 """
                 An IPython console lexer for IPython code-blocks and doctests, such as:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: a = 'foo'
                         In [2]: a
                         Out[2]: 'foo'
                         In [3]: print a
                         foo
                         In [4]: 1 / 0
                 Support is also provided for IPython exceptions:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: raise Exception
                         ---------------------------------------------------------------------------
                         Exception                                 Traceback (most recent call last)
                         <ipython-input-1-fca2ab0ca76b> in <module>()
                         ----> 1 raise Exception
                         Exception:
                 """
                 name = 'IPython console session'
                 aliases = ['ipythonconsole']
                 mimetypes = ['text/x-ipython-console']
                 # The regexps used to determine what is input and what is output.
                 # The default prompts for IPython are:
                 #
                 #     c.PromptManager.in_template  = 'In [\#]: '
                 #     c.PromptManager.in2_template = '   .\D.: '
                 #     c.PromptManager.out_template = 'Out[\#]: '
                 #
                 in1_regex = r'In \[[0-9]+\]: '
                 in2_regex = r'   \.\.+\.: '
                 out_regex = r'Out\[[0-9]+\]: '
                 #: The regex to determine when a traceback starts.
                 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^(  File)(.*)(, line )(\d+\n)')
                 def __init__(self, **options):
                     """Initialize the IPython console lexer.
                     Parameters
                     ----------
                     python3 : bool
                         If `True`, then the console inputs are parsed using a Python 3
                         lexer. Otherwise, they are parsed using a Python 2 lexer.
                     in1_regex : RegexObject
                         The compiled regular expression used to detect the start
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     in2_regex : RegexObject
                         The compiled regular expression used to detect the continuation
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     out_regex : RegexObject
                         The compiled regular expression used to detect outputs. If `None`,
                         then the default output prompt is assumed.
                     """
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3console']
                     else:
                         self.aliases = ['ipython2console', 'ipythonconsole']
                     in1_regex = options.get('in1_regex', self.in1_regex)
                     in2_regex = options.get('in2_regex', self.in2_regex)
                     out_regex = options.get('out_regex', self.out_regex)
                     # So that we can work with input and output prompts which have been
                     # rstrip'd (possibly by editors) we also need rstrip'd variants. If
                     # we do not do this, then such prompts will be tagged as 'output'.
                     # The reason can't just use the rstrip'd variants instead is because
                     # we want any whitespace associated with the prompt to be inserted
                     # with the token. This allows formatted code to be modified so as hide
                     # the appearance of prompts, with the whitespace included. One example
                     # use of this is in copybutton.js from the standard lib Python docs.
                     in1_regex_rstrip = in1_regex.rstrip() + '\n'
                     in2_regex_rstrip = in2_regex.rstrip() + '\n'
                     out_regex_rstrip = out_regex.rstrip() + '\n'
                     # Compile and save them all.
                     attrs = ['in1_regex', 'in2_regex', 'out_regex',
                              'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
                     for attr in attrs:
                         self.__setattr__(attr, re.compile(locals()[attr]))
                     Lexer.__init__(self, **options)
                     if self.python3:
                         pylexer = IPython3Lexer
                         tblexer = IPythonTracebackLexer
                     else:
                         pylexer = IPythonLexer
                         tblexer = IPythonTracebackLexer
                     self.pylexer = pylexer(**options)
                     self.tblexer = tblexer(**options)
                     self.reset()
                 def reset(self):
                     self.mode = 'output'
                     self.index = 0
                     self.buffer = u''
                     self.insertions = []
                 def buffered_tokens(self):
                     """
                     Generator of unprocessed tokens after doing insertions and before
                     changing to a new state.
                     """
                     if self.mode == 'output':
                         tokens = [(0, Generic.Output, self.buffer)]
                     elif self.mode == 'input':
                         tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
                     else: # traceback
                         tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
                     for i, t, v in do_insertions(self.insertions, tokens):
                         # All token indexes are relative to the buffer.
                         yield self.index + i, t, v
                     # Clear it all
                     self.index += len(self.buffer)
                     self.buffer = u''
                     self.insertions = []
                 def get_mci(self, line):
                     """
                     Parses the line and returns a 3-tuple: (mode, code, insertion).
                     `mode` is the next mode (or state) of the lexer, and is always equal
                     to 'input', 'output', or 'tb'.
                     `code` is a portion of the line that should be added to the buffer
                     corresponding to the next mode and eventually lexed by another lexer.
                     For example, `code` could be Python code if `mode` were 'input'.
                     `insertion` is a 3-tuple (index, token, text) representing an
                     unprocessed "token" that will be inserted into the stream of tokens
                     that are created from the buffer once we change modes. This is usually
                     the input or output prompt.
                     In general, the next mode depends on current mode and on the contents
                     of `line`.
                     """
                     # To reduce the number of regex match checks, we have multiple
                     # 'if' blocks instead of 'if-elif' blocks.
                     # Check for possible end of input
                     in2_match = self.in2_regex.match(line)
                     in2_match_rstrip = self.in2_regex_rstrip.match(line)
                     if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
                        in2_match_rstrip:
                         end_input = True
                     else:
                         end_input = False
                     if end_input and self.mode != 'tb':
                         # Only look for an end of input when not in tb mode.
                         # An ellipsis could appear within the traceback.
                         mode = 'output'
                         code = u''
                         insertion = (0, Generic.Prompt, line)
                         return mode, code, insertion
                     # Check for output prompt
                     out_match = self.out_regex.match(line)
                     out_match_rstrip = self.out_regex_rstrip.match(line)
                     if out_match or out_match_rstrip:
                         mode = 'output'
                         if out_match:
                             idx = out_match.end()
                         else:
                             idx = out_match_rstrip.end()
                         code = line[idx:]
                         # Use the 'heading' token for output.  We cannot use Generic.Error
                         # since it would conflict with exceptions.
                         insertion = (0, Generic.Heading, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (non stripped version)
                     in1_match = self.in1_regex.match(line)
                     if in1_match or (in2_match and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match:
                             idx = in1_match.end()
                         else: # in2_match
                             idx = in2_match.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (stripped version)
                     in1_match_rstrip = self.in1_regex_rstrip.match(line)
                     if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match_rstrip:
                             idx = in1_match_rstrip.end()
                         else: # in2_match
                             idx = in2_match_rstrip.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for traceback
                     if self.ipytb_start.match(line):
                         mode = 'tb'
                         code = line
                         insertion = None
                         return mode, code, insertion
                     # All other stuff...
                     if self.mode in ('input', 'output'):
                         # We assume all other text is output. Multiline input that
                         # does not use the continuation marker cannot be detected.
                         # For example, the 3 in the following is clearly output:
                         #
                         #    In [1]: print 3
                         #    3
                         #
                         # But the following second line is part of the input:
                         #
                         #    In [2]: while True:
                         #        print True
                         #
                         # In both cases, the 2nd line will be 'output'.
                         #
                         mode = 'output'
                     else:
                         mode = 'tb'
                     code = line
                     insertion = None
                     return mode, code, insertion
                 def get_tokens_unprocessed(self, text):
                     self.reset()
                     for match in line_re.finditer(text):
                         line = match.group()
                         mode, code, insertion = self.get_mci(line)
                         if mode != self.mode:
                             # Yield buffered tokens before transitioning to new mode.
                             for token in self.buffered_tokens():
                                 yield token
                             self.mode = mode
                         if insertion:
                             self.insertions.append((len(self.buffer), [insertion]))
                         self.buffer += code
                     else:
                         for token in self.buffered_tokens():
                             yield token
             class IPyLexer(Lexer):
                 """
                 Primary lexer for all IPython-like code.
                 This is a simple helper lexer.  If the first line of the text begins with
                 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
                 lexer. If not, then the entire text is parsed with an IPython lexer.
                 The goal is to reduce the number of lexers that are registered
                 with Pygments.
                 """
                 name = 'IPy session'
                 aliases = ['ipy']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipy3']
                     else:
                         self.aliases = ['ipy2', 'ipy']
                     Lexer.__init__(self, **options)
                     self.IPythonLexer = IPythonLexer(**options)
                     self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
                 def get_tokens_unprocessed(self, text):
                     # Search for the input prompt anywhere...this allows code blocks to
                     # begin with comments as well.
                     if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
                         lex = self.IPythonConsoleLexer
                     else:
                         lex = self.IPythonLexer
                     for token in lex.get_tokens_unprocessed(text):
                         yield token

             """Test lexers module"""
             # Copyright (c) IPython Development Team.
             # Distributed under the terms of the Modified BSD License.
             from unittest import TestCase
             from pygments.token import Token
             from .. import lexers
             class TestLexers(TestCase):
                 """Collection of lexers tests"""
                 def setUp(self):
                     self.lexer = lexers.IPythonLexer()
                 def testIPythonLexer(self):
                     fragment = '!echo $HOME\n'
                     tokens = [
                         (Token.Operator, '!'),
                         (Token.Name.Builtin, 'echo'),
                         (Token.Text, ' '),
                         (Token.Name.Variable, '$HOME'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
                     fragment_2 = '!' + fragment
                     tokens_2 = [
                         (Token.Operator, '!!'),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = '\t %%!\n' + fragment[1:]
                     tokens_2 = [
                         (Token.Text, '\t '),
                         (Token.Operator, '%%!'),
                         (Token.Text, '\n'),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = %sx ' + fragment[1:]
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'sx'),
                         (Token.Text, ' '),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'f = %R function () {}\n'
                     tokens_2 = [
                         (Token.Name, 'f'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'R'),
                         (Token.Text, ' function () {}\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = '\t%%xyz\n$foo\n'
                     tokens_2 = [
                         (Token.Text, '\t'),
                         (Token.Operator, '%%'),
                         (Token.Keyword, 'xyz'),
                         (Token.Text, '\n$foo\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = '%system?\n'
                     tokens_2 = [
                         (Token.Operator, '%'),
                         (Token.Keyword, 'system'),
                         (Token.Operator, '?'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x != y\n'
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Text, ' '),
                         (Token.Operator, '!='),
                         (Token.Text, ' '),
                         (Token.Name, 'y'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = ' ?math.sin\n'
                     tokens_2 = [
                         (Token.Text, ' '),
                         (Token.Operator, '?'),
                         (Token.Text, 'math.sin'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
+                    fragment = ' *int*?\n'
+                    tokens = [
+                        (Token.Text, ' *int*'),
+                        (Token.Operator, '?'),
+                        (Token.Text, '\n'),
+                    ]
+                    self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))