upstream/ipython Commit - r24344:0d71ec69

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

5

This includes:

6

7

IPythonLexer, IPython3Lexer

7

IPythonLexer, IPython3Lexer

8

Lexers for pure IPython (python + magic/shell commands)

8

Lexers for pure IPython (python + magic/shell commands)

9

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

12

lexer reads everything but the Python code appearing in a traceback.

12

lexer reads everything but the Python code appearing in a traceback.

13

The full lexer combines the partial lexer with an IPython lexer.

13

The full lexer combines the partial lexer with an IPython lexer.

14

15

IPythonConsoleLexer

15

IPythonConsoleLexer

16

A lexer for IPython console sessions, with support for tracebacks.

16

A lexer for IPython console sessions, with support for tracebacks.

17

18

IPyLexer

18

IPyLexer

19

A friendly lexer which examines the first line of text and from it,

19

A friendly lexer which examines the first line of text and from it,

20

decides whether to use an IPython lexer or an IPython console lexer.

20

decides whether to use an IPython lexer or an IPython console lexer.

21

This is probably the only lexer that needs to be explicitly added

21

This is probably the only lexer that needs to be explicitly added

22

to Pygments.

22

to Pygments.

23

24

"""

24

"""

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

27

#

27

#

28

# Distributed under the terms of the Modified BSD License.

28

# Distributed under the terms of the Modified BSD License.

29

#

29

#

30

# The full license is in the file COPYING.txt, distributed with this software.

30

# The full license is in the file COPYING.txt, distributed with this software.

31

#-----------------------------------------------------------------------------

31

#-----------------------------------------------------------------------------

32

33

# Standard library

33

# Standard library

34

import re

34

import re

35

36

# Third party

36

# Third party

37

from pygments.lexers import ~~BashLexer~~, ~~PythonLexer~~, ~~Python3Lexer~~

37

from pygments.lexers import (

38

BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,

39

Python3Lexer, TexLexer)

38

from pygments.lexer import (

40

from pygments.lexer import (

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

41

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

40

)

42

)

41

from pygments.token import (

43

from pygments.token import (

42

Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

44

Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

43

)

45

)

44

from pygments.util import get_bool_opt

46

from pygments.util import get_bool_opt

45

47

46

# Local

48

# Local

47

49

48

line_re = re.compile('.*?\n')

50

line_re = re.compile('.*?\n')

49

51

50

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',

52

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',

51

'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',

53

'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',

52

'IPythonConsoleLexer', 'IPyLexer']

54

'IPythonConsoleLexer', 'IPyLexer']

53

55

54

ipython_tokens = [

55

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

56

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

57

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

58

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

59

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

60

using(BashLexer), Text)),

61

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

62

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

63

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

64

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

65

(r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),

66

]

67

56

68

def build_ipy_lexer(python3):

57

def build_ipy_lexer(python3):

69

"""Builds IPython lexers depending on the value of `python3`.

58

"""Builds IPython lexers depending on the value of `python3`.

70

59

71

The lexer inherits from an appropriate Python lexer and then adds

60

The lexer inherits from an appropriate Python lexer and then adds

72

information about IPython specific keywords (i.e. magic commands,

61

information about IPython specific keywords (i.e. magic commands,

73

shell commands, etc.)

62

shell commands, etc.)

74

63

75

Parameters

64

Parameters

76

----------

65

----------

77

python3 : bool

66

python3 : bool

78

If `True`, then build an IPython lexer from a Python 3 lexer.

67

If `True`, then build an IPython lexer from a Python 3 lexer.

79

68

80

"""

69

"""

81

# It would be nice to have a single IPython lexer class which takes

70

# It would be nice to have a single IPython lexer class which takes

82

# a boolean `python3`. But since there are two Python lexer classes,

71

# a boolean `python3`. But since there are two Python lexer classes,

83

# we will also have two IPython lexer classes.

72

# we will also have two IPython lexer classes.

84

if python3:

73

if python3:

85

PyLexer = Python3Lexer

74

PyLexer = Python3Lexer

86

name = 'IPython3'

75

name = 'IPython3'

87

aliases = ['ipython3']

76

aliases = ['ipython3']

88

doc = """IPython3 Lexer"""

77

doc = """IPython3 Lexer"""

89

else:

78

else:

90

PyLexer = PythonLexer

79

PyLexer = PythonLexer

91

name = 'IPython'

80

name = 'IPython'

92

aliases = ['ipython2', 'ipython']

81

aliases = ['ipython2', 'ipython']

93

doc = """IPython Lexer"""

82

doc = """IPython Lexer"""

94

83

84

ipython_tokens = [

85

(r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

86

(r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

87

(r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),

88

(r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

89

(r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

90

(r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),

91

(r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),

92

(r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

93

(r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

94

(r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

95

(r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),

96

(r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),

97

(r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),

98

(r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

99

(r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

100

(r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

101

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

102

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

103

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

104

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

105

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

106

using(BashLexer), Text)),

107

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

108

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

109

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

110

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

111

(r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),

112

]

113

95

tokens = PyLexer.tokens.copy()

114

tokens = PyLexer.tokens.copy()

96

tokens['root'] = ipython_tokens + tokens['root']

115

tokens['root'] = ipython_tokens + tokens['root']

97

116

98

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

117

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

99

'__doc__': doc, 'tokens': tokens}

118

'__doc__': doc, 'tokens': tokens}

100

119

101

return type(name, (PyLexer,), attrs)

120

return type(name, (PyLexer,), attrs)

102

121

103

122

104

IPython3Lexer = build_ipy_lexer(python3=True)

123

IPython3Lexer = build_ipy_lexer(python3=True)

105

IPythonLexer = build_ipy_lexer(python3=False)

124

IPythonLexer = build_ipy_lexer(python3=False)

106

125

107

126

108

class IPythonPartialTracebackLexer(RegexLexer):

127

class IPythonPartialTracebackLexer(RegexLexer):

109

"""

128

"""

110

Partial lexer for IPython tracebacks.

129

Partial lexer for IPython tracebacks.

111

130

112

Handles all the non-python output. This works for both Python 2.x and 3.x.

131

Handles all the non-python output. This works for both Python 2.x and 3.x.

113

132

114

"""

133

"""

115

name = 'IPython Partial Traceback'

134

name = 'IPython Partial Traceback'

116

135

117

tokens = {

136

tokens = {

118

'root': [

137

'root': [

119

# Tracebacks for syntax errors have a different style.

138

# Tracebacks for syntax errors have a different style.

120

# For both types of tracebacks, we mark the first line with

139

# For both types of tracebacks, we mark the first line with

121

# Generic.Traceback. For syntax errors, we mark the filename

140

# Generic.Traceback. For syntax errors, we mark the filename

122

# as we mark the filenames for non-syntax tracebacks.

141

# as we mark the filenames for non-syntax tracebacks.

123

#

142

#

124

# These two regexps define how IPythonConsoleLexer finds a

143

# These two regexps define how IPythonConsoleLexer finds a

125

# traceback.

144

# traceback.

126

#

145

#

127

## Non-syntax traceback

146

## Non-syntax traceback

128

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

147

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

129

## Syntax traceback

148

## Syntax traceback

130

(r'^( File)(.*)(, line )(\d+\n)',

149

(r'^( File)(.*)(, line )(\d+\n)',

131

bygroups(Generic.Traceback, Name.Namespace,

150

bygroups(Generic.Traceback, Name.Namespace,

132

Generic.Traceback, Literal.Number.Integer)),

151

Generic.Traceback, Literal.Number.Integer)),

133

152

134

# (Exception Identifier)(Whitespace)(Traceback Message)

153

# (Exception Identifier)(Whitespace)(Traceback Message)

135

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

154

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

136

bygroups(Name.Exception, Generic.Whitespace, Text)),

155

bygroups(Name.Exception, Generic.Whitespace, Text)),

137

# (Module/Filename)(Text)(Callee)(Function Signature)

156

# (Module/Filename)(Text)(Callee)(Function Signature)

138

# Better options for callee and function signature?

157

# Better options for callee and function signature?

139

(r'(.*)( in )(.*)($.*$\n)',

158

(r'(.*)( in )(.*)($.*$\n)',

140

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

159

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

141

# Regular line: (Whitespace)(Line Number)(Python Code)

160

# Regular line: (Whitespace)(Line Number)(Python Code)

142

(r'(\s*?)(\d+)(.*?\n)',

161

(r'(\s*?)(\d+)(.*?\n)',

143

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

162

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

144

# Emphasized line: (Arrow)(Line Number)(Python Code)

163

# Emphasized line: (Arrow)(Line Number)(Python Code)

145

# Using Exception token so arrow color matches the Exception.

164

# Using Exception token so arrow color matches the Exception.

146

(r'(-*>?\s?)(\d+)(.*?\n)',

165

(r'(-*>?\s?)(\d+)(.*?\n)',

147

bygroups(Name.Exception, Literal.Number.Integer, Other)),

166

bygroups(Name.Exception, Literal.Number.Integer, Other)),

148

# (Exception Identifier)(Message)

167

# (Exception Identifier)(Message)

149

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

168

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

150

bygroups(Name.Exception, Text)),

169

bygroups(Name.Exception, Text)),

151

# Tag everything else as Other, will be handled later.

170

# Tag everything else as Other, will be handled later.

152

(r'.*\n', Other),

171

(r'.*\n', Other),

153

],

172

],

154

}

173

}

155

174

156

175

157

class IPythonTracebackLexer(DelegatingLexer):

176

class IPythonTracebackLexer(DelegatingLexer):

158

"""

177

"""

159

IPython traceback lexer.

178

IPython traceback lexer.

160

179

161

For doctests, the tracebacks can be snipped as much as desired with the

180

For doctests, the tracebacks can be snipped as much as desired with the

162

exception to the lines that designate a traceback. For non-syntax error

181

exception to the lines that designate a traceback. For non-syntax error

163

tracebacks, this is the line of hyphens. For syntax error tracebacks,

182

tracebacks, this is the line of hyphens. For syntax error tracebacks,

164

this is the line which lists the File and line number.

183

this is the line which lists the File and line number.

165

184

166

"""

185

"""

167

# The lexer inherits from DelegatingLexer. The "root" lexer is an

186

# The lexer inherits from DelegatingLexer. The "root" lexer is an

168

# appropriate IPython lexer, which depends on the value of the boolean

187

# appropriate IPython lexer, which depends on the value of the boolean

169

# `python3`. First, we parse with the partial IPython traceback lexer.

188

# `python3`. First, we parse with the partial IPython traceback lexer.

170

# Then, any code marked with the "Other" token is delegated to the root

189

# Then, any code marked with the "Other" token is delegated to the root

171

# lexer.

190

# lexer.

172

#

191

#

173

name = 'IPython Traceback'

192

name = 'IPython Traceback'

174

aliases = ['ipythontb']

193

aliases = ['ipythontb']

175

194

176

def __init__(self, **options):

195

def __init__(self, **options):

177

self.python3 = get_bool_opt(options, 'python3', False)

196

self.python3 = get_bool_opt(options, 'python3', False)

178

if self.python3:

197

if self.python3:

179

self.aliases = ['ipython3tb']

198

self.aliases = ['ipython3tb']

180

else:

199

else:

181

self.aliases = ['ipython2tb', 'ipythontb']

200

self.aliases = ['ipython2tb', 'ipythontb']

182

201

183

if self.python3:

202

if self.python3:

184

IPyLexer = IPython3Lexer

203

IPyLexer = IPython3Lexer

185

else:

204

else:

186

IPyLexer = IPythonLexer

205

IPyLexer = IPythonLexer

187

206

188

DelegatingLexer.__init__(self, IPyLexer,

207

DelegatingLexer.__init__(self, IPyLexer,

189

IPythonPartialTracebackLexer, **options)

208

IPythonPartialTracebackLexer, **options)

190

209

191

class IPythonConsoleLexer(Lexer):

210

class IPythonConsoleLexer(Lexer):

192

"""

211

"""

193

An IPython console lexer for IPython code-blocks and doctests, such as:

212

An IPython console lexer for IPython code-blocks and doctests, such as:

194

213

195

.. code-block:: rst

214

.. code-block:: rst

196

215

197

.. code-block:: ipythonconsole

216

.. code-block:: ipythonconsole

198

217

199

In [1]: a = 'foo'

218

In [1]: a = 'foo'

200

219

201

In [2]: a

220

In [2]: a

202

Out[2]: 'foo'

221

Out[2]: 'foo'

203

222

204

In [3]: print a

223

In [3]: print a

205

foo

224

foo

206

225

207

In [4]: 1 / 0

226

In [4]: 1 / 0

208

227

209

228

210

Support is also provided for IPython exceptions:

229

Support is also provided for IPython exceptions:

211

230

212

.. code-block:: rst

231

.. code-block:: rst

213

232

214

.. code-block:: ipythonconsole

233

.. code-block:: ipythonconsole

215

234

216

In [1]: raise Exception

235

In [1]: raise Exception

217

236

218

---------------------------------------------------------------------------

237

---------------------------------------------------------------------------

219

Exception Traceback (most recent call last)

238

Exception Traceback (most recent call last)

220

<ipython-input-1-fca2ab0ca76b> in <module>()

239

<ipython-input-1-fca2ab0ca76b> in <module>()

221

----> 1 raise Exception

240

----> 1 raise Exception

222

241

223

Exception:

242

Exception:

224

243

225

"""

244

"""

226

name = 'IPython console session'

245

name = 'IPython console session'

227

aliases = ['ipythonconsole']

246

aliases = ['ipythonconsole']

228

mimetypes = ['text/x-ipython-console']

247

mimetypes = ['text/x-ipython-console']

229

248

230

# The regexps used to determine what is input and what is output.

249

# The regexps used to determine what is input and what is output.

231

# The default prompts for IPython are:

250

# The default prompts for IPython are:

232

#

251

#

233

# in = 'In [#]: '

252

# in = 'In [#]: '

234

# continuation = ' .D.: '

253

# continuation = ' .D.: '

235

# template = 'Out[#]: '

254

# template = 'Out[#]: '

236

#

255

#

237

# Where '#' is the 'prompt number' or 'execution count' and 'D'

256

# Where '#' is the 'prompt number' or 'execution count' and 'D'

238

# D is a number of dots matching the width of the execution count

257

# D is a number of dots matching the width of the execution count

239

#

258

#

240

in1_regex = r'In \[[0-9]+\]: '

259

in1_regex = r'In \[[0-9]+\]: '

241

in2_regex = r' \.\.+\.: '

260

in2_regex = r' \.\.+\.: '

242

out_regex = r'Out\[[0-9]+\]: '

261

out_regex = r'Out\[[0-9]+\]: '

243

262

244

#: The regex to determine when a traceback starts.

263

#: The regex to determine when a traceback starts.

245

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

264

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

246

265

247

def __init__(self, **options):

266

def __init__(self, **options):

248

"""Initialize the IPython console lexer.

267

"""Initialize the IPython console lexer.

249

268

250

Parameters

269

Parameters

251

----------

270

----------

252

python3 : bool

271

python3 : bool

253

If `True`, then the console inputs are parsed using a Python 3

272

If `True`, then the console inputs are parsed using a Python 3

254

lexer. Otherwise, they are parsed using a Python 2 lexer.

273

lexer. Otherwise, they are parsed using a Python 2 lexer.

255

in1_regex : RegexObject

274

in1_regex : RegexObject

256

The compiled regular expression used to detect the start

275

The compiled regular expression used to detect the start

257

of inputs. Although the IPython configuration setting may have a

276

of inputs. Although the IPython configuration setting may have a

258

trailing whitespace, do not include it in the regex. If `None`,

277

trailing whitespace, do not include it in the regex. If `None`,

259

then the default input prompt is assumed.

278

then the default input prompt is assumed.

260

in2_regex : RegexObject

279

in2_regex : RegexObject

261

The compiled regular expression used to detect the continuation

280

The compiled regular expression used to detect the continuation

262

of inputs. Although the IPython configuration setting may have a

281

of inputs. Although the IPython configuration setting may have a

263

trailing whitespace, do not include it in the regex. If `None`,

282

trailing whitespace, do not include it in the regex. If `None`,

264

then the default input prompt is assumed.

283

then the default input prompt is assumed.

265

out_regex : RegexObject

284

out_regex : RegexObject

266

The compiled regular expression used to detect outputs. If `None`,

285

The compiled regular expression used to detect outputs. If `None`,

267

then the default output prompt is assumed.

286

then the default output prompt is assumed.

268

287

269

"""

288

"""

270

self.python3 = get_bool_opt(options, 'python3', False)

289

self.python3 = get_bool_opt(options, 'python3', False)

271

if self.python3:

290

if self.python3:

272

self.aliases = ['ipython3console']

291

self.aliases = ['ipython3console']

273

else:

292

else:

274

self.aliases = ['ipython2console', 'ipythonconsole']

293

self.aliases = ['ipython2console', 'ipythonconsole']

275

294

276

in1_regex = options.get('in1_regex', self.in1_regex)

295

in1_regex = options.get('in1_regex', self.in1_regex)

277

in2_regex = options.get('in2_regex', self.in2_regex)

296

in2_regex = options.get('in2_regex', self.in2_regex)

278

out_regex = options.get('out_regex', self.out_regex)

297

out_regex = options.get('out_regex', self.out_regex)

279

298

280

# So that we can work with input and output prompts which have been

299

# So that we can work with input and output prompts which have been

281

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

300

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

282

# we do not do this, then such prompts will be tagged as 'output'.

301

# we do not do this, then such prompts will be tagged as 'output'.

283

# The reason can't just use the rstrip'd variants instead is because

302

# The reason can't just use the rstrip'd variants instead is because

284

# we want any whitespace associated with the prompt to be inserted

303

# we want any whitespace associated with the prompt to be inserted

285

# with the token. This allows formatted code to be modified so as hide

304

# with the token. This allows formatted code to be modified so as hide

286

# the appearance of prompts, with the whitespace included. One example

305

# the appearance of prompts, with the whitespace included. One example

287

# use of this is in copybutton.js from the standard lib Python docs.

306

# use of this is in copybutton.js from the standard lib Python docs.

288

in1_regex_rstrip = in1_regex.rstrip() + '\n'

307

in1_regex_rstrip = in1_regex.rstrip() + '\n'

289

in2_regex_rstrip = in2_regex.rstrip() + '\n'

308

in2_regex_rstrip = in2_regex.rstrip() + '\n'

290

out_regex_rstrip = out_regex.rstrip() + '\n'

309

out_regex_rstrip = out_regex.rstrip() + '\n'

291

310

292

# Compile and save them all.

311

# Compile and save them all.

293

attrs = ['in1_regex', 'in2_regex', 'out_regex',

312

attrs = ['in1_regex', 'in2_regex', 'out_regex',

294

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

313

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

295

for attr in attrs:

314

for attr in attrs:

296

self.__setattr__(attr, re.compile(locals()[attr]))

315

self.__setattr__(attr, re.compile(locals()[attr]))

297

316

298

Lexer.__init__(self, **options)

317

Lexer.__init__(self, **options)

299

318

300

if self.python3:

319

if self.python3:

301

pylexer = IPython3Lexer

320

pylexer = IPython3Lexer

302

tblexer = IPythonTracebackLexer

321

tblexer = IPythonTracebackLexer

303

else:

322

else:

304

pylexer = IPythonLexer

323

pylexer = IPythonLexer

305

tblexer = IPythonTracebackLexer

324

tblexer = IPythonTracebackLexer

306

325

307

self.pylexer = pylexer(**options)

326

self.pylexer = pylexer(**options)

308

self.tblexer = tblexer(**options)

327

self.tblexer = tblexer(**options)

309

328

310

self.reset()

329

self.reset()

311

330

312

def reset(self):

331

def reset(self):

313

self.mode = 'output'

332

self.mode = 'output'

314

self.index = 0

333

self.index = 0

315

self.buffer = u''

334

self.buffer = u''

316

self.insertions = []

335

self.insertions = []

317

336

318

def buffered_tokens(self):

337

def buffered_tokens(self):

319

"""

338

"""

320

Generator of unprocessed tokens after doing insertions and before

339

Generator of unprocessed tokens after doing insertions and before

321

changing to a new state.

340

changing to a new state.

322

341

323

"""

342

"""

324

if self.mode == 'output':

343

if self.mode == 'output':

325

tokens = [(0, Generic.Output, self.buffer)]

344

tokens = [(0, Generic.Output, self.buffer)]

326

elif self.mode == 'input':

345

elif self.mode == 'input':

327

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

346

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

328

else: # traceback

347

else: # traceback

329

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

348

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

330

349

331

for i, t, v in do_insertions(self.insertions, tokens):

350

for i, t, v in do_insertions(self.insertions, tokens):

332

# All token indexes are relative to the buffer.

351

# All token indexes are relative to the buffer.

333

yield self.index + i, t, v

352

yield self.index + i, t, v

334

353

335

# Clear it all

354

# Clear it all

336

self.index += len(self.buffer)

355

self.index += len(self.buffer)

337

self.buffer = u''

356

self.buffer = u''

338

self.insertions = []

357

self.insertions = []

339

358

340

def get_mci(self, line):

359

def get_mci(self, line):

341

"""

360

"""

342

Parses the line and returns a 3-tuple: (mode, code, insertion).

361

Parses the line and returns a 3-tuple: (mode, code, insertion).

343

362

344

`mode` is the next mode (or state) of the lexer, and is always equal

363

`mode` is the next mode (or state) of the lexer, and is always equal

345

to 'input', 'output', or 'tb'.

364

to 'input', 'output', or 'tb'.

346

365

347

`code` is a portion of the line that should be added to the buffer

366

`code` is a portion of the line that should be added to the buffer

348

corresponding to the next mode and eventually lexed by another lexer.

367

corresponding to the next mode and eventually lexed by another lexer.

349

For example, `code` could be Python code if `mode` were 'input'.

368

For example, `code` could be Python code if `mode` were 'input'.

350

369

351

`insertion` is a 3-tuple (index, token, text) representing an

370

`insertion` is a 3-tuple (index, token, text) representing an

352

unprocessed "token" that will be inserted into the stream of tokens

371

unprocessed "token" that will be inserted into the stream of tokens

353

that are created from the buffer once we change modes. This is usually

372

that are created from the buffer once we change modes. This is usually

354

the input or output prompt.

373

the input or output prompt.

355

374

356

In general, the next mode depends on current mode and on the contents

375

In general, the next mode depends on current mode and on the contents

357

of `line`.

376

of `line`.

358

377

359

"""

378

"""

360

# To reduce the number of regex match checks, we have multiple

379

# To reduce the number of regex match checks, we have multiple

361

# 'if' blocks instead of 'if-elif' blocks.

380

# 'if' blocks instead of 'if-elif' blocks.

362

381

363

# Check for possible end of input

382

# Check for possible end of input

364

in2_match = self.in2_regex.match(line)

383

in2_match = self.in2_regex.match(line)

365

in2_match_rstrip = self.in2_regex_rstrip.match(line)

384

in2_match_rstrip = self.in2_regex_rstrip.match(line)

366

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

385

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

367

in2_match_rstrip:

386

in2_match_rstrip:

368

end_input = True

387

end_input = True

369

else:

388

else:

370

end_input = False

389

end_input = False

371

if end_input and self.mode != 'tb':

390

if end_input and self.mode != 'tb':

372

# Only look for an end of input when not in tb mode.

391

# Only look for an end of input when not in tb mode.

373

# An ellipsis could appear within the traceback.

392

# An ellipsis could appear within the traceback.

374

mode = 'output'

393

mode = 'output'

375

code = u''

394

code = u''

376

insertion = (0, Generic.Prompt, line)

395

insertion = (0, Generic.Prompt, line)

377

return mode, code, insertion

396

return mode, code, insertion

378

397

379

# Check for output prompt

398

# Check for output prompt

380

out_match = self.out_regex.match(line)

399

out_match = self.out_regex.match(line)

381

out_match_rstrip = self.out_regex_rstrip.match(line)

400

out_match_rstrip = self.out_regex_rstrip.match(line)

382

if out_match or out_match_rstrip:

401

if out_match or out_match_rstrip:

383

mode = 'output'

402

mode = 'output'

384

if out_match:

403

if out_match:

385

idx = out_match.end()

404

idx = out_match.end()

386

else:

405

else:

387

idx = out_match_rstrip.end()

406

idx = out_match_rstrip.end()

388

code = line[idx:]

407

code = line[idx:]

389

# Use the 'heading' token for output. We cannot use Generic.Error

408

# Use the 'heading' token for output. We cannot use Generic.Error

390

# since it would conflict with exceptions.

409

# since it would conflict with exceptions.

391

insertion = (0, Generic.Heading, line[:idx])

410

insertion = (0, Generic.Heading, line[:idx])

392

return mode, code, insertion

411

return mode, code, insertion

393

412

394

413

395

# Check for input or continuation prompt (non stripped version)

414

# Check for input or continuation prompt (non stripped version)

396

in1_match = self.in1_regex.match(line)

415

in1_match = self.in1_regex.match(line)

397

if in1_match or (in2_match and self.mode != 'tb'):

416

if in1_match or (in2_match and self.mode != 'tb'):

398

# New input or when not in tb, continued input.

417

# New input or when not in tb, continued input.

399

# We do not check for continued input when in tb since it is

418

# We do not check for continued input when in tb since it is

400

# allowable to replace a long stack with an ellipsis.

419

# allowable to replace a long stack with an ellipsis.

401

mode = 'input'

420

mode = 'input'

402

if in1_match:

421

if in1_match:

403

idx = in1_match.end()

422

idx = in1_match.end()

404

else: # in2_match

423

else: # in2_match

405

idx = in2_match.end()

424

idx = in2_match.end()

406

code = line[idx:]

425

code = line[idx:]

407

insertion = (0, Generic.Prompt, line[:idx])

426

insertion = (0, Generic.Prompt, line[:idx])

408

return mode, code, insertion

427

return mode, code, insertion

409

428

410

# Check for input or continuation prompt (stripped version)

429

# Check for input or continuation prompt (stripped version)

411

in1_match_rstrip = self.in1_regex_rstrip.match(line)

430

in1_match_rstrip = self.in1_regex_rstrip.match(line)

412

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

431

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

413

# New input or when not in tb, continued input.

432

# New input or when not in tb, continued input.

414

# We do not check for continued input when in tb since it is

433

# We do not check for continued input when in tb since it is

415

# allowable to replace a long stack with an ellipsis.

434

# allowable to replace a long stack with an ellipsis.

416

mode = 'input'

435

mode = 'input'

417

if in1_match_rstrip:

436

if in1_match_rstrip:

418

idx = in1_match_rstrip.end()

437

idx = in1_match_rstrip.end()

419

else: # in2_match

438

else: # in2_match

420

idx = in2_match_rstrip.end()

439

idx = in2_match_rstrip.end()

421

code = line[idx:]

440

code = line[idx:]

422

insertion = (0, Generic.Prompt, line[:idx])

441

insertion = (0, Generic.Prompt, line[:idx])

423

return mode, code, insertion

442

return mode, code, insertion

424

443

425

# Check for traceback

444

# Check for traceback

426

if self.ipytb_start.match(line):

445

if self.ipytb_start.match(line):

427

mode = 'tb'

446

mode = 'tb'

428

code = line

447

code = line

429

insertion = None

448

insertion = None

430

return mode, code, insertion

449

return mode, code, insertion

431

450

432

# All other stuff...

451

# All other stuff...

433

if self.mode in ('input', 'output'):

452

if self.mode in ('input', 'output'):

434

# We assume all other text is output. Multiline input that

453

# We assume all other text is output. Multiline input that

435

# does not use the continuation marker cannot be detected.

454

# does not use the continuation marker cannot be detected.

436

# For example, the 3 in the following is clearly output:

455

# For example, the 3 in the following is clearly output:

437

#

456

#

438

# In [1]: print 3

457

# In [1]: print 3

439

# 3

458

# 3

440

#

459

#

441

# But the following second line is part of the input:

460

# But the following second line is part of the input:

442

#

461

#

443

# In [2]: while True:

462

# In [2]: while True:

444

# print True

463

# print True

445

#

464

#

446

# In both cases, the 2nd line will be 'output'.

465

# In both cases, the 2nd line will be 'output'.

447

#

466

#

448

mode = 'output'

467

mode = 'output'

449

else:

468

else:

450

mode = 'tb'

469

mode = 'tb'

451

470

452

code = line

471

code = line

453

insertion = None

472

insertion = None

454

473

455

return mode, code, insertion

474

return mode, code, insertion

456

475

457

def get_tokens_unprocessed(self, text):

476

def get_tokens_unprocessed(self, text):

458

self.reset()

477

self.reset()

459

for match in line_re.finditer(text):

478

for match in line_re.finditer(text):

460

line = match.group()

479

line = match.group()

461

mode, code, insertion = self.get_mci(line)

480

mode, code, insertion = self.get_mci(line)

462

481

463

if mode != self.mode:

482

if mode != self.mode:

464

# Yield buffered tokens before transitioning to new mode.

483

# Yield buffered tokens before transitioning to new mode.

465

for token in self.buffered_tokens():

484

for token in self.buffered_tokens():

466

yield token

485

yield token

467

self.mode = mode

486

self.mode = mode

468

487

469

if insertion:

488

if insertion:

470

self.insertions.append((len(self.buffer), [insertion]))

489

self.insertions.append((len(self.buffer), [insertion]))

471

self.buffer += code

490

self.buffer += code

472

491

473

for token in self.buffered_tokens():

492

for token in self.buffered_tokens():

474

yield token

493

yield token

475

494

476

class IPyLexer(Lexer):

495

class IPyLexer(Lexer):

477

"""

496

"""

478

Primary lexer for all IPython-like code.

497

Primary lexer for all IPython-like code.

479

498

480

This is a simple helper lexer. If the first line of the text begins with

499

This is a simple helper lexer. If the first line of the text begins with

481

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

500

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

482

lexer. If not, then the entire text is parsed with an IPython lexer.

501

lexer. If not, then the entire text is parsed with an IPython lexer.

483

502

484

The goal is to reduce the number of lexers that are registered

503

The goal is to reduce the number of lexers that are registered

485

with Pygments.

504

with Pygments.

486

505

487

"""

506

"""

488

name = 'IPy session'

507

name = 'IPy session'

489

aliases = ['ipy']

508

aliases = ['ipy']

490

509

491

def __init__(self, **options):

510

def __init__(self, **options):

492

self.python3 = get_bool_opt(options, 'python3', False)

511

self.python3 = get_bool_opt(options, 'python3', False)

493

if self.python3:

512

if self.python3:

494

self.aliases = ['ipy3']

513

self.aliases = ['ipy3']

495

else:

514

else:

496

self.aliases = ['ipy2', 'ipy']

515

self.aliases = ['ipy2', 'ipy']

497

516

498

Lexer.__init__(self, **options)

517

Lexer.__init__(self, **options)

499

518

500

self.IPythonLexer = IPythonLexer(**options)

519

self.IPythonLexer = IPythonLexer(**options)

501

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

520

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

502

521

503

def get_tokens_unprocessed(self, text):

522

def get_tokens_unprocessed(self, text):

504

# Search for the input prompt anywhere...this allows code blocks to

523

# Search for the input prompt anywhere...this allows code blocks to

505

# begin with comments as well.

524

# begin with comments as well.

506

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

525

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

507

lex = self.IPythonConsoleLexer

526

lex = self.IPythonConsoleLexer

508

else:

527

else:

509

lex = self.IPythonLexer

528

lex = self.IPythonLexer

510

for token in lex.get_tokens_unprocessed(text):

529

for token in lex.get_tokens_unprocessed(text):

511

yield token

530

yield token

512

531

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
             Defines a variety of Pygments lexers for highlighting IPython code.
             This includes:
                 IPythonLexer, IPython3Lexer
                     Lexers for pure IPython (python + magic/shell commands)
                 IPythonPartialTracebackLexer, IPythonTracebackLexer
                     Supports 2.x and 3.x via keyword `python3`.  The partial traceback
                     lexer reads everything but the Python code appearing in a traceback.
                     The full lexer combines the partial lexer with an IPython lexer.
                 IPythonConsoleLexer
                     A lexer for IPython console sessions, with support for tracebacks.
                 IPyLexer
                     A friendly lexer which examines the first line of text and from it,
                     decides whether to use an IPython lexer or an IPython console lexer.
                     This is probably the only lexer that needs to be explicitly added
                     to Pygments.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             # Standard library
             import re
             # Third party
-            from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
+            from pygments.lexers import (
+                BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
+                Python3Lexer, TexLexer)
             from pygments.lexer import (
                 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
             )
             from pygments.token import (
                 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
             )
             from pygments.util import get_bool_opt
             # Local
             line_re = re.compile('.*?\n')
             __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
                        'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
                        'IPythonConsoleLexer', 'IPyLexer']
-            ipython_tokens = [
-              (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
-              (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
-              (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
-              (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
-              (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
-                                                   using(BashLexer), Text)),
-              (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
-              (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
-              (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
-              (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
-              (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
             def build_ipy_lexer(python3):
                 """Builds IPython lexers depending on the value of `python3`.
                 The lexer inherits from an appropriate Python lexer and then adds
                 information about IPython specific keywords (i.e. magic commands,
                 shell commands, etc.)
                 Parameters
                 ----------
                 python3 : bool
                     If `True`, then build an IPython lexer from a Python 3 lexer.
                 """
                 # It would be nice to have a single IPython lexer class which takes
                 # a boolean `python3`.  But since there are two Python lexer classes,
                 # we will also have two IPython lexer classes.
                 if python3:
                     PyLexer = Python3Lexer
                     name = 'IPython3'
                     aliases = ['ipython3']
                     doc = """IPython3 Lexer"""
                 else:
                     PyLexer = PythonLexer
                     name = 'IPython'
                     aliases = ['ipython2', 'ipython']
                     doc = """IPython Lexer"""
+                ipython_tokens = [
+                   (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
+                    (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
+                    (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
+                    (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
+                    (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
+                    (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
+                    (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
+                    (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
+                    (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
+                    (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
+                    (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
+                    (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
+                    (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
+                    (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
+                    (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
+                    (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
+                    (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
+                    (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
+                    (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
+                    (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
+                    (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
+                                                            using(BashLexer), Text)),
+                    (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
+                    (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
+                    (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
+                    (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
+                    (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
+                ]
                 tokens = PyLexer.tokens.copy()
                 tokens['root'] = ipython_tokens + tokens['root']
                 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
                          '__doc__': doc, 'tokens': tokens}
                 return type(name, (PyLexer,), attrs)
             IPython3Lexer = build_ipy_lexer(python3=True)
             IPythonLexer = build_ipy_lexer(python3=False)
             class IPythonPartialTracebackLexer(RegexLexer):
                 """
                 Partial lexer for IPython tracebacks.
                 Handles all the non-python output. This works for both Python 2.x and 3.x.
                 """
                 name = 'IPython Partial Traceback'
                 tokens = {
                     'root': [
                         # Tracebacks for syntax errors have a different style.
                         # For both types of tracebacks, we mark the first line with
                         # Generic.Traceback.  For syntax errors, we mark the filename
                         # as we mark the filenames for non-syntax tracebacks.
                         #
                         # These two regexps define how IPythonConsoleLexer finds a
                         # traceback.
                         #
                         ## Non-syntax traceback
                         (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
                         ## Syntax traceback
                         (r'^(  File)(.*)(, line )(\d+\n)',
                          bygroups(Generic.Traceback, Name.Namespace,
                                   Generic.Traceback, Literal.Number.Integer)),
                         # (Exception Identifier)(Whitespace)(Traceback Message)
                         (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
                          bygroups(Name.Exception, Generic.Whitespace, Text)),
                         # (Module/Filename)(Text)(Callee)(Function Signature)
                         # Better options for callee and function signature?
                         (r'(.*)( in )(.*)(\(.*\)\n)',
                          bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
                         # Regular line: (Whitespace)(Line Number)(Python Code)
                         (r'(\s*?)(\d+)(.*?\n)',
                          bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
                         # Emphasized line: (Arrow)(Line Number)(Python Code)
                         # Using Exception token so arrow color matches the Exception.
                         (r'(-*>?\s?)(\d+)(.*?\n)',
                          bygroups(Name.Exception, Literal.Number.Integer, Other)),
                         # (Exception Identifier)(Message)
                         (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
                          bygroups(Name.Exception, Text)),
                         # Tag everything else as Other, will be handled later.
                         (r'.*\n', Other),
                     ],
                 }
             class IPythonTracebackLexer(DelegatingLexer):
                 """
                 IPython traceback lexer.
                 For doctests, the tracebacks can be snipped as much as desired with the
                 exception to the lines that designate a traceback. For non-syntax error
                 tracebacks, this is the line of hyphens. For syntax error tracebacks,
                 this is the line which lists the File and line number.
                 """
                 # The lexer inherits from DelegatingLexer.  The "root" lexer is an
                 # appropriate IPython lexer, which depends on the value of the boolean
                 # `python3`.  First, we parse with the partial IPython traceback lexer.
                 # Then, any code marked with the "Other" token is delegated to the root
                 # lexer.
                 #
                 name = 'IPython Traceback'
                 aliases = ['ipythontb']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3tb']
                     else:
                         self.aliases = ['ipython2tb', 'ipythontb']
                     if self.python3:
                         IPyLexer = IPython3Lexer
                     else:
                         IPyLexer = IPythonLexer
                     DelegatingLexer.__init__(self, IPyLexer,
                                              IPythonPartialTracebackLexer, **options)
             class IPythonConsoleLexer(Lexer):
                 """
                 An IPython console lexer for IPython code-blocks and doctests, such as:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: a = 'foo'
                         In [2]: a
                         Out[2]: 'foo'
                         In [3]: print a
                         foo
                         In [4]: 1 / 0
                 Support is also provided for IPython exceptions:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: raise Exception
                         ---------------------------------------------------------------------------
                         Exception                                 Traceback (most recent call last)
                         <ipython-input-1-fca2ab0ca76b> in <module>()
                         ----> 1 raise Exception
                         Exception:
                 """
                 name = 'IPython console session'
                 aliases = ['ipythonconsole']
                 mimetypes = ['text/x-ipython-console']
                 # The regexps used to determine what is input and what is output.
                 # The default prompts for IPython are:
                 #
                 #    in           = 'In [#]: '
                 #    continuation = '   .D.: '
                 #    template     = 'Out[#]: '
                 #
                 # Where '#' is the 'prompt number' or 'execution count' and 'D'
                 # D is a number of dots  matching the width of the execution count
                 #
                 in1_regex = r'In \[[0-9]+\]: '
                 in2_regex = r'   \.\.+\.: '
                 out_regex = r'Out\[[0-9]+\]: '
                 #: The regex to determine when a traceback starts.
                 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^(  File)(.*)(, line )(\d+\n)')
                 def __init__(self, **options):
                     """Initialize the IPython console lexer.
                     Parameters
                     ----------
                     python3 : bool
                         If `True`, then the console inputs are parsed using a Python 3
                         lexer. Otherwise, they are parsed using a Python 2 lexer.
                     in1_regex : RegexObject
                         The compiled regular expression used to detect the start
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     in2_regex : RegexObject
                         The compiled regular expression used to detect the continuation
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     out_regex : RegexObject
                         The compiled regular expression used to detect outputs. If `None`,
                         then the default output prompt is assumed.
                     """
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3console']
                     else:
                         self.aliases = ['ipython2console', 'ipythonconsole']
                     in1_regex = options.get('in1_regex', self.in1_regex)
                     in2_regex = options.get('in2_regex', self.in2_regex)
                     out_regex = options.get('out_regex', self.out_regex)
                     # So that we can work with input and output prompts which have been
                     # rstrip'd (possibly by editors) we also need rstrip'd variants. If
                     # we do not do this, then such prompts will be tagged as 'output'.
                     # The reason can't just use the rstrip'd variants instead is because
                     # we want any whitespace associated with the prompt to be inserted
                     # with the token. This allows formatted code to be modified so as hide
                     # the appearance of prompts, with the whitespace included. One example
                     # use of this is in copybutton.js from the standard lib Python docs.
                     in1_regex_rstrip = in1_regex.rstrip() + '\n'
                     in2_regex_rstrip = in2_regex.rstrip() + '\n'
                     out_regex_rstrip = out_regex.rstrip() + '\n'
                     # Compile and save them all.
                     attrs = ['in1_regex', 'in2_regex', 'out_regex',
                              'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
                     for attr in attrs:
                         self.__setattr__(attr, re.compile(locals()[attr]))
                     Lexer.__init__(self, **options)
                     if self.python3:
                         pylexer = IPython3Lexer
                         tblexer = IPythonTracebackLexer
                     else:
                         pylexer = IPythonLexer
                         tblexer = IPythonTracebackLexer
                     self.pylexer = pylexer(**options)
                     self.tblexer = tblexer(**options)
                     self.reset()
                 def reset(self):
                     self.mode = 'output'
                     self.index = 0
                     self.buffer = u''
                     self.insertions = []
                 def buffered_tokens(self):
                     """
                     Generator of unprocessed tokens after doing insertions and before
                     changing to a new state.
                     """
                     if self.mode == 'output':
                         tokens = [(0, Generic.Output, self.buffer)]
                     elif self.mode == 'input':
                         tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
                     else: # traceback
                         tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
                     for i, t, v in do_insertions(self.insertions, tokens):
                         # All token indexes are relative to the buffer.
                         yield self.index + i, t, v
                     # Clear it all
                     self.index += len(self.buffer)
                     self.buffer = u''
                     self.insertions = []
                 def get_mci(self, line):
                     """
                     Parses the line and returns a 3-tuple: (mode, code, insertion).
                     `mode` is the next mode (or state) of the lexer, and is always equal
                     to 'input', 'output', or 'tb'.
                     `code` is a portion of the line that should be added to the buffer
                     corresponding to the next mode and eventually lexed by another lexer.
                     For example, `code` could be Python code if `mode` were 'input'.
                     `insertion` is a 3-tuple (index, token, text) representing an
                     unprocessed "token" that will be inserted into the stream of tokens
                     that are created from the buffer once we change modes. This is usually
                     the input or output prompt.
                     In general, the next mode depends on current mode and on the contents
                     of `line`.
                     """
                     # To reduce the number of regex match checks, we have multiple
                     # 'if' blocks instead of 'if-elif' blocks.
                     # Check for possible end of input
                     in2_match = self.in2_regex.match(line)
                     in2_match_rstrip = self.in2_regex_rstrip.match(line)
                     if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
                        in2_match_rstrip:
                         end_input = True
                     else:
                         end_input = False
                     if end_input and self.mode != 'tb':
                         # Only look for an end of input when not in tb mode.
                         # An ellipsis could appear within the traceback.
                         mode = 'output'
                         code = u''
                         insertion = (0, Generic.Prompt, line)
                         return mode, code, insertion
                     # Check for output prompt
                     out_match = self.out_regex.match(line)
                     out_match_rstrip = self.out_regex_rstrip.match(line)
                     if out_match or out_match_rstrip:
                         mode = 'output'
                         if out_match:
                             idx = out_match.end()
                         else:
                             idx = out_match_rstrip.end()
                         code = line[idx:]
                         # Use the 'heading' token for output.  We cannot use Generic.Error
                         # since it would conflict with exceptions.
                         insertion = (0, Generic.Heading, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (non stripped version)
                     in1_match = self.in1_regex.match(line)
                     if in1_match or (in2_match and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match:
                             idx = in1_match.end()
                         else: # in2_match
                             idx = in2_match.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (stripped version)
                     in1_match_rstrip = self.in1_regex_rstrip.match(line)
                     if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match_rstrip:
                             idx = in1_match_rstrip.end()
                         else: # in2_match
                             idx = in2_match_rstrip.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for traceback
                     if self.ipytb_start.match(line):
                         mode = 'tb'
                         code = line
                         insertion = None
                         return mode, code, insertion
                     # All other stuff...
                     if self.mode in ('input', 'output'):
                         # We assume all other text is output. Multiline input that
                         # does not use the continuation marker cannot be detected.
                         # For example, the 3 in the following is clearly output:
                         #
                         #    In [1]: print 3
                         #    3
                         #
                         # But the following second line is part of the input:
                         #
                         #    In [2]: while True:
                         #        print True
                         #
                         # In both cases, the 2nd line will be 'output'.
                         #
                         mode = 'output'
                     else:
                         mode = 'tb'
                     code = line
                     insertion = None
                     return mode, code, insertion
                 def get_tokens_unprocessed(self, text):
                     self.reset()
                     for match in line_re.finditer(text):
                         line = match.group()
                         mode, code, insertion = self.get_mci(line)
                         if mode != self.mode:
                             # Yield buffered tokens before transitioning to new mode.
                             for token in self.buffered_tokens():
                                 yield token
                             self.mode = mode
                         if insertion:
                             self.insertions.append((len(self.buffer), [insertion]))
                         self.buffer += code
                     for token in self.buffered_tokens():
                         yield token
             class IPyLexer(Lexer):
                 """
                 Primary lexer for all IPython-like code.
                 This is a simple helper lexer.  If the first line of the text begins with
                 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
                 lexer. If not, then the entire text is parsed with an IPython lexer.
                 The goal is to reduce the number of lexers that are registered
                 with Pygments.
                 """
                 name = 'IPy session'
                 aliases = ['ipy']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipy3']
                     else:
                         self.aliases = ['ipy2', 'ipy']
                     Lexer.__init__(self, **options)
                     self.IPythonLexer = IPythonLexer(**options)
                     self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
                 def get_tokens_unprocessed(self, text):
                     # Search for the input prompt anywhere...this allows code blocks to
                     # begin with comments as well.
                     if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
                         lex = self.IPythonConsoleLexer
                     else:
                         lex = self.IPythonLexer
                     for token in lex.get_tokens_unprocessed(text):
                         yield token