upstream/ipython Commit - r24672:a2a029f0

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

5

This includes:

6

7

IPythonLexer, IPython3Lexer

7

IPythonLexer, IPython3Lexer

8

Lexers for pure IPython (python + magic/shell commands)

8

Lexers for pure IPython (python + magic/shell commands)

9

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

12

lexer reads everything but the Python code appearing in a traceback.

12

lexer reads everything but the Python code appearing in a traceback.

13

The full lexer combines the partial lexer with an IPython lexer.

13

The full lexer combines the partial lexer with an IPython lexer.

14

15

IPythonConsoleLexer

15

IPythonConsoleLexer

16

A lexer for IPython console sessions, with support for tracebacks.

16

A lexer for IPython console sessions, with support for tracebacks.

17

18

IPyLexer

18

IPyLexer

19

A friendly lexer which examines the first line of text and from it,

19

A friendly lexer which examines the first line of text and from it,

20

decides whether to use an IPython lexer or an IPython console lexer.

20

decides whether to use an IPython lexer or an IPython console lexer.

21

This is probably the only lexer that needs to be explicitly added

21

This is probably the only lexer that needs to be explicitly added

22

to Pygments.

22

to Pygments.

23

24

"""

24

"""

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

27

#

27

#

28

# Distributed under the terms of the Modified BSD License.

28

# Distributed under the terms of the Modified BSD License.

29

#

29

#

30

# The full license is in the file COPYING.txt, distributed with this software.

30

# The full license is in the file COPYING.txt, distributed with this software.

31

#-----------------------------------------------------------------------------

31

#-----------------------------------------------------------------------------

32

33

# Standard library

33

# Standard library

34

import re

34

import re

35

36

# Third party

36

# Third party

37

from pygments.lexers import (

37

from pygments.lexers import (

38

BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,

38

BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,

39

Python3Lexer, TexLexer)

39

Python3Lexer, TexLexer)

40

from pygments.lexer import (

40

from pygments.lexer import (

41

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

41

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

42

)

42

)

43

from pygments.token import (

43

from pygments.token import (

44

Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

44

Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

45

)

45

)

46

from pygments.util import get_bool_opt

46

from pygments.util import get_bool_opt

47

48

# Local

48

# Local

49

50

line_re = re.compile('.*?\n')

50

line_re = re.compile('.*?\n')

51

52

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',

52

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',

53

'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',

53

'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',

54

'IPythonConsoleLexer', 'IPyLexer']

54

'IPythonConsoleLexer', 'IPyLexer']

55

56

57

def build_ipy_lexer(python3):

57

def build_ipy_lexer(python3):

58

"""Builds IPython lexers depending on the value of `python3`.

58

"""Builds IPython lexers depending on the value of `python3`.

59

60

The lexer inherits from an appropriate Python lexer and then adds

60

The lexer inherits from an appropriate Python lexer and then adds

61

information about IPython specific keywords (i.e. magic commands,

61

information about IPython specific keywords (i.e. magic commands,

62

shell commands, etc.)

62

shell commands, etc.)

63

64

Parameters

64

Parameters

65

----------

65

----------

66

python3 : bool

66

python3 : bool

67

If `True`, then build an IPython lexer from a Python 3 lexer.

67

If `True`, then build an IPython lexer from a Python 3 lexer.

68

69

"""

69

"""

70

# It would be nice to have a single IPython lexer class which takes

70

# It would be nice to have a single IPython lexer class which takes

71

# a boolean `python3`. But since there are two Python lexer classes,

71

# a boolean `python3`. But since there are two Python lexer classes,

72

# we will also have two IPython lexer classes.

72

# we will also have two IPython lexer classes.

73

if python3:

73

if python3:

74

PyLexer = Python3Lexer

74

PyLexer = Python3Lexer

75

name = 'IPython3'

75

name = 'IPython3'

76

aliases = ['ipython3']

76

aliases = ['ipython3']

77

doc = """IPython3 Lexer"""

77

doc = """IPython3 Lexer"""

78

else:

78

else:

79

PyLexer = PythonLexer

79

PyLexer = PythonLexer

80

name = 'IPython'

80

name = 'IPython'

81

aliases = ['ipython2', 'ipython']

81

aliases = ['ipython2', 'ipython']

82

doc = """IPython Lexer"""

82

doc = """IPython Lexer"""

83

84

ipython_tokens = [

84

ipython_tokens = [

85

(r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

85

(r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

86

(r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

86

(r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

87

(r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),

87

(r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),

88

(r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

88

(r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

89

(r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

89

(r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

90

(r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),

90

(r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),

91

(r'(?s)(\s*)(%%p~~ypy~~)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),

91

(r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),

92

(r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

92

(r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

93

(r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

93

(r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

94

(r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

94

(r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

95

(r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),

95

(r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),

96

(r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),

96

(r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),

97

(r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),

97

(r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),

98

(r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

98

(r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

99

(r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

99

(r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

100

(r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

100

(r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

101

(r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

101

(r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

102

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

102

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

103

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

103

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

104

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

104

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

105

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

105

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

106

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

106

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

107

using(BashLexer), Text)),

107

using(BashLexer), Text)),

108

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

108

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

109

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

109

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

110

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

110

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

111

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

111

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

112

(r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),

112

(r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),

113

]

113

]

114

115

tokens = PyLexer.tokens.copy()

115

tokens = PyLexer.tokens.copy()

116

tokens['root'] = ipython_tokens + tokens['root']

116

tokens['root'] = ipython_tokens + tokens['root']

117

118

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

118

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

119

'__doc__': doc, 'tokens': tokens}

119

'__doc__': doc, 'tokens': tokens}

120

121

return type(name, (PyLexer,), attrs)

121

return type(name, (PyLexer,), attrs)

122

123

124

IPython3Lexer = build_ipy_lexer(python3=True)

124

IPython3Lexer = build_ipy_lexer(python3=True)

125

IPythonLexer = build_ipy_lexer(python3=False)

125

IPythonLexer = build_ipy_lexer(python3=False)

126

127

128

class IPythonPartialTracebackLexer(RegexLexer):

128

class IPythonPartialTracebackLexer(RegexLexer):

129

"""

129

"""

130

Partial lexer for IPython tracebacks.

130

Partial lexer for IPython tracebacks.

131

132

Handles all the non-python output. This works for both Python 2.x and 3.x.

132

Handles all the non-python output. This works for both Python 2.x and 3.x.

133

134

"""

134

"""

135

name = 'IPython Partial Traceback'

135

name = 'IPython Partial Traceback'

136

137

tokens = {

137

tokens = {

138

'root': [

138

'root': [

139

# Tracebacks for syntax errors have a different style.

139

# Tracebacks for syntax errors have a different style.

140

# For both types of tracebacks, we mark the first line with

140

# For both types of tracebacks, we mark the first line with

141

# Generic.Traceback. For syntax errors, we mark the filename

141

# Generic.Traceback. For syntax errors, we mark the filename

142

# as we mark the filenames for non-syntax tracebacks.

142

# as we mark the filenames for non-syntax tracebacks.

143

#

143

#

144

# These two regexps define how IPythonConsoleLexer finds a

144

# These two regexps define how IPythonConsoleLexer finds a

145

# traceback.

145

# traceback.

146

#

146

#

147

## Non-syntax traceback

147

## Non-syntax traceback

148

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

148

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

149

## Syntax traceback

149

## Syntax traceback

150

(r'^( File)(.*)(, line )(\d+\n)',

150

(r'^( File)(.*)(, line )(\d+\n)',

151

bygroups(Generic.Traceback, Name.Namespace,

151

bygroups(Generic.Traceback, Name.Namespace,

152

Generic.Traceback, Literal.Number.Integer)),

152

Generic.Traceback, Literal.Number.Integer)),

153

154

# (Exception Identifier)(Whitespace)(Traceback Message)

154

# (Exception Identifier)(Whitespace)(Traceback Message)

155

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

155

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

156

bygroups(Name.Exception, Generic.Whitespace, Text)),

156

bygroups(Name.Exception, Generic.Whitespace, Text)),

157

# (Module/Filename)(Text)(Callee)(Function Signature)

157

# (Module/Filename)(Text)(Callee)(Function Signature)

158

# Better options for callee and function signature?

158

# Better options for callee and function signature?

159

(r'(.*)( in )(.*)($.*$\n)',

159

(r'(.*)( in )(.*)($.*$\n)',

160

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

160

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

161

# Regular line: (Whitespace)(Line Number)(Python Code)

161

# Regular line: (Whitespace)(Line Number)(Python Code)

162

(r'(\s*?)(\d+)(.*?\n)',

162

(r'(\s*?)(\d+)(.*?\n)',

163

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

163

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

164

# Emphasized line: (Arrow)(Line Number)(Python Code)

164

# Emphasized line: (Arrow)(Line Number)(Python Code)

165

# Using Exception token so arrow color matches the Exception.

165

# Using Exception token so arrow color matches the Exception.

166

(r'(-*>?\s?)(\d+)(.*?\n)',

166

(r'(-*>?\s?)(\d+)(.*?\n)',

167

bygroups(Name.Exception, Literal.Number.Integer, Other)),

167

bygroups(Name.Exception, Literal.Number.Integer, Other)),

168

# (Exception Identifier)(Message)

168

# (Exception Identifier)(Message)

169

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

169

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

170

bygroups(Name.Exception, Text)),

170

bygroups(Name.Exception, Text)),

171

# Tag everything else as Other, will be handled later.

171

# Tag everything else as Other, will be handled later.

172

(r'.*\n', Other),

172

(r'.*\n', Other),

173

],

173

],

174

}

174

}

175

176

177

class IPythonTracebackLexer(DelegatingLexer):

177

class IPythonTracebackLexer(DelegatingLexer):

178

"""

178

"""

179

IPython traceback lexer.

179

IPython traceback lexer.

180

181

For doctests, the tracebacks can be snipped as much as desired with the

181

For doctests, the tracebacks can be snipped as much as desired with the

182

exception to the lines that designate a traceback. For non-syntax error

182

exception to the lines that designate a traceback. For non-syntax error

183

tracebacks, this is the line of hyphens. For syntax error tracebacks,

183

tracebacks, this is the line of hyphens. For syntax error tracebacks,

184

this is the line which lists the File and line number.

184

this is the line which lists the File and line number.

185

186

"""

186

"""

187

# The lexer inherits from DelegatingLexer. The "root" lexer is an

187

# The lexer inherits from DelegatingLexer. The "root" lexer is an

188

# appropriate IPython lexer, which depends on the value of the boolean

188

# appropriate IPython lexer, which depends on the value of the boolean

189

# `python3`. First, we parse with the partial IPython traceback lexer.

189

# `python3`. First, we parse with the partial IPython traceback lexer.

190

# Then, any code marked with the "Other" token is delegated to the root

190

# Then, any code marked with the "Other" token is delegated to the root

191

# lexer.

191

# lexer.

192

#

192

#

193

name = 'IPython Traceback'

193

name = 'IPython Traceback'

194

aliases = ['ipythontb']

194

aliases = ['ipythontb']

195

196

def __init__(self, **options):

196

def __init__(self, **options):

197

self.python3 = get_bool_opt(options, 'python3', False)

197

self.python3 = get_bool_opt(options, 'python3', False)

198

if self.python3:

198

if self.python3:

199

self.aliases = ['ipython3tb']

199

self.aliases = ['ipython3tb']

200

else:

200

else:

201

self.aliases = ['ipython2tb', 'ipythontb']

201

self.aliases = ['ipython2tb', 'ipythontb']

202

203

if self.python3:

203

if self.python3:

204

IPyLexer = IPython3Lexer

204

IPyLexer = IPython3Lexer

205

else:

205

else:

206

IPyLexer = IPythonLexer

206

IPyLexer = IPythonLexer

207

208

DelegatingLexer.__init__(self, IPyLexer,

208

DelegatingLexer.__init__(self, IPyLexer,

209

IPythonPartialTracebackLexer, **options)

209

IPythonPartialTracebackLexer, **options)

210

211

class IPythonConsoleLexer(Lexer):

211

class IPythonConsoleLexer(Lexer):

212

"""

212

"""

213

An IPython console lexer for IPython code-blocks and doctests, such as:

213

An IPython console lexer for IPython code-blocks and doctests, such as:

214

215

.. code-block:: rst

215

.. code-block:: rst

216

217

.. code-block:: ipythonconsole

217

.. code-block:: ipythonconsole

218

219

In [1]: a = 'foo'

219

In [1]: a = 'foo'

220

221

In [2]: a

221

In [2]: a

222

Out[2]: 'foo'

222

Out[2]: 'foo'

223

224

In [3]: print a

224

In [3]: print a

225

foo

225

foo

226

227

In [4]: 1 / 0

227

In [4]: 1 / 0

228

229

230

Support is also provided for IPython exceptions:

230

Support is also provided for IPython exceptions:

231

232

.. code-block:: rst

232

.. code-block:: rst

233

234

.. code-block:: ipythonconsole

234

.. code-block:: ipythonconsole

235

236

In [1]: raise Exception

236

In [1]: raise Exception

237

238

---------------------------------------------------------------------------

238

---------------------------------------------------------------------------

239

Exception Traceback (most recent call last)

239

Exception Traceback (most recent call last)

240

<ipython-input-1-fca2ab0ca76b> in <module>

240

<ipython-input-1-fca2ab0ca76b> in <module>

241

----> 1 raise Exception

241

----> 1 raise Exception

242

243

Exception:

243

Exception:

244

245

"""

245

"""

246

name = 'IPython console session'

246

name = 'IPython console session'

247

aliases = ['ipythonconsole']

247

aliases = ['ipythonconsole']

248

mimetypes = ['text/x-ipython-console']

248

mimetypes = ['text/x-ipython-console']

249

250

# The regexps used to determine what is input and what is output.

250

# The regexps used to determine what is input and what is output.

251

# The default prompts for IPython are:

251

# The default prompts for IPython are:

252

#

252

#

253

# in = 'In [#]: '

253

# in = 'In [#]: '

254

# continuation = ' .D.: '

254

# continuation = ' .D.: '

255

# template = 'Out[#]: '

255

# template = 'Out[#]: '

256

#

256

#

257

# Where '#' is the 'prompt number' or 'execution count' and 'D'

257

# Where '#' is the 'prompt number' or 'execution count' and 'D'

258

# D is a number of dots matching the width of the execution count

258

# D is a number of dots matching the width of the execution count

259

#

259

#

260

in1_regex = r'In \[[0-9]+\]: '

260

in1_regex = r'In \[[0-9]+\]: '

261

in2_regex = r' \.\.+\.: '

261

in2_regex = r' \.\.+\.: '

262

out_regex = r'Out\[[0-9]+\]: '

262

out_regex = r'Out\[[0-9]+\]: '

263

264

#: The regex to determine when a traceback starts.

264

#: The regex to determine when a traceback starts.

265

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

265

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

266

267

def __init__(self, **options):

267

def __init__(self, **options):

268

"""Initialize the IPython console lexer.

268

"""Initialize the IPython console lexer.

269

270

Parameters

270

Parameters

271

----------

271

----------

272

python3 : bool

272

python3 : bool

273

If `True`, then the console inputs are parsed using a Python 3

273

If `True`, then the console inputs are parsed using a Python 3

274

lexer. Otherwise, they are parsed using a Python 2 lexer.

274

lexer. Otherwise, they are parsed using a Python 2 lexer.

275

in1_regex : RegexObject

275

in1_regex : RegexObject

276

The compiled regular expression used to detect the start

276

The compiled regular expression used to detect the start

277

of inputs. Although the IPython configuration setting may have a

277

of inputs. Although the IPython configuration setting may have a

278

trailing whitespace, do not include it in the regex. If `None`,

278

trailing whitespace, do not include it in the regex. If `None`,

279

then the default input prompt is assumed.

279

then the default input prompt is assumed.

280

in2_regex : RegexObject

280

in2_regex : RegexObject

281

The compiled regular expression used to detect the continuation

281

The compiled regular expression used to detect the continuation

282

of inputs. Although the IPython configuration setting may have a

282

of inputs. Although the IPython configuration setting may have a

283

trailing whitespace, do not include it in the regex. If `None`,

283

trailing whitespace, do not include it in the regex. If `None`,

284

then the default input prompt is assumed.

284

then the default input prompt is assumed.

285

out_regex : RegexObject

285

out_regex : RegexObject

286

The compiled regular expression used to detect outputs. If `None`,

286

The compiled regular expression used to detect outputs. If `None`,

287

then the default output prompt is assumed.

287

then the default output prompt is assumed.

288

289

"""

289

"""

290

self.python3 = get_bool_opt(options, 'python3', False)

290

self.python3 = get_bool_opt(options, 'python3', False)

291

if self.python3:

291

if self.python3:

292

self.aliases = ['ipython3console']

292

self.aliases = ['ipython3console']

293

else:

293

else:

294

self.aliases = ['ipython2console', 'ipythonconsole']

294

self.aliases = ['ipython2console', 'ipythonconsole']

295

296

in1_regex = options.get('in1_regex', self.in1_regex)

296

in1_regex = options.get('in1_regex', self.in1_regex)

297

in2_regex = options.get('in2_regex', self.in2_regex)

297

in2_regex = options.get('in2_regex', self.in2_regex)

298

out_regex = options.get('out_regex', self.out_regex)

298

out_regex = options.get('out_regex', self.out_regex)

299

300

# So that we can work with input and output prompts which have been

300

# So that we can work with input and output prompts which have been

301

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

301

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

302

# we do not do this, then such prompts will be tagged as 'output'.

302

# we do not do this, then such prompts will be tagged as 'output'.

303

# The reason can't just use the rstrip'd variants instead is because

303

# The reason can't just use the rstrip'd variants instead is because

304

# we want any whitespace associated with the prompt to be inserted

304

# we want any whitespace associated with the prompt to be inserted

305

# with the token. This allows formatted code to be modified so as hide

305

# with the token. This allows formatted code to be modified so as hide

306

# the appearance of prompts, with the whitespace included. One example

306

# the appearance of prompts, with the whitespace included. One example

307

# use of this is in copybutton.js from the standard lib Python docs.

307

# use of this is in copybutton.js from the standard lib Python docs.

308

in1_regex_rstrip = in1_regex.rstrip() + '\n'

308

in1_regex_rstrip = in1_regex.rstrip() + '\n'

309

in2_regex_rstrip = in2_regex.rstrip() + '\n'

309

in2_regex_rstrip = in2_regex.rstrip() + '\n'

310

out_regex_rstrip = out_regex.rstrip() + '\n'

310

out_regex_rstrip = out_regex.rstrip() + '\n'

311

312

# Compile and save them all.

312

# Compile and save them all.

313

attrs = ['in1_regex', 'in2_regex', 'out_regex',

313

attrs = ['in1_regex', 'in2_regex', 'out_regex',

314

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

314

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

315

for attr in attrs:

315

for attr in attrs:

316

self.__setattr__(attr, re.compile(locals()[attr]))

316

self.__setattr__(attr, re.compile(locals()[attr]))

317

318

Lexer.__init__(self, **options)

318

Lexer.__init__(self, **options)

319

320

if self.python3:

320

if self.python3:

321

pylexer = IPython3Lexer

321

pylexer = IPython3Lexer

322

tblexer = IPythonTracebackLexer

322

tblexer = IPythonTracebackLexer

323

else:

323

else:

324

pylexer = IPythonLexer

324

pylexer = IPythonLexer

325

tblexer = IPythonTracebackLexer

325

tblexer = IPythonTracebackLexer

326

327

self.pylexer = pylexer(**options)

327

self.pylexer = pylexer(**options)

328

self.tblexer = tblexer(**options)

328

self.tblexer = tblexer(**options)

329

330

self.reset()

330

self.reset()

331

332

def reset(self):

332

def reset(self):

333

self.mode = 'output'

333

self.mode = 'output'

334

self.index = 0

334

self.index = 0

335

self.buffer = u''

335

self.buffer = u''

336

self.insertions = []

336

self.insertions = []

337

338

def buffered_tokens(self):

338

def buffered_tokens(self):

339

"""

339

"""

340

Generator of unprocessed tokens after doing insertions and before

340

Generator of unprocessed tokens after doing insertions and before

341

changing to a new state.

341

changing to a new state.

342

343

"""

343

"""

344

if self.mode == 'output':

344

if self.mode == 'output':

345

tokens = [(0, Generic.Output, self.buffer)]

345

tokens = [(0, Generic.Output, self.buffer)]

346

elif self.mode == 'input':

346

elif self.mode == 'input':

347

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

347

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

348

else: # traceback

348

else: # traceback

349

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

349

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

350

351

for i, t, v in do_insertions(self.insertions, tokens):

351

for i, t, v in do_insertions(self.insertions, tokens):

352

# All token indexes are relative to the buffer.

352

# All token indexes are relative to the buffer.

353

yield self.index + i, t, v

353

yield self.index + i, t, v

354

355

# Clear it all

355

# Clear it all

356

self.index += len(self.buffer)

356

self.index += len(self.buffer)

357

self.buffer = u''

357

self.buffer = u''

358

self.insertions = []

358

self.insertions = []

359

360

def get_mci(self, line):

360

def get_mci(self, line):

361

"""

361

"""

362

Parses the line and returns a 3-tuple: (mode, code, insertion).

362

Parses the line and returns a 3-tuple: (mode, code, insertion).

363

364

`mode` is the next mode (or state) of the lexer, and is always equal

364

`mode` is the next mode (or state) of the lexer, and is always equal

365

to 'input', 'output', or 'tb'.

365

to 'input', 'output', or 'tb'.

366

367

`code` is a portion of the line that should be added to the buffer

367

`code` is a portion of the line that should be added to the buffer

368

corresponding to the next mode and eventually lexed by another lexer.

368

corresponding to the next mode and eventually lexed by another lexer.

369

For example, `code` could be Python code if `mode` were 'input'.

369

For example, `code` could be Python code if `mode` were 'input'.

370

371

`insertion` is a 3-tuple (index, token, text) representing an

371

`insertion` is a 3-tuple (index, token, text) representing an

372

unprocessed "token" that will be inserted into the stream of tokens

372

unprocessed "token" that will be inserted into the stream of tokens

373

that are created from the buffer once we change modes. This is usually

373

that are created from the buffer once we change modes. This is usually

374

the input or output prompt.

374

the input or output prompt.

375

376

In general, the next mode depends on current mode and on the contents

376

In general, the next mode depends on current mode and on the contents

377

of `line`.

377

of `line`.

378

379

"""

379

"""

380

# To reduce the number of regex match checks, we have multiple

380

# To reduce the number of regex match checks, we have multiple

381

# 'if' blocks instead of 'if-elif' blocks.

381

# 'if' blocks instead of 'if-elif' blocks.

382

383

# Check for possible end of input

383

# Check for possible end of input

384

in2_match = self.in2_regex.match(line)

384

in2_match = self.in2_regex.match(line)

385

in2_match_rstrip = self.in2_regex_rstrip.match(line)

385

in2_match_rstrip = self.in2_regex_rstrip.match(line)

386

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

386

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

387

in2_match_rstrip:

387

in2_match_rstrip:

388

end_input = True

388

end_input = True

389

else:

389

else:

390

end_input = False

390

end_input = False

391

if end_input and self.mode != 'tb':

391

if end_input and self.mode != 'tb':

392

# Only look for an end of input when not in tb mode.

392

# Only look for an end of input when not in tb mode.

393

# An ellipsis could appear within the traceback.

393

# An ellipsis could appear within the traceback.

394

mode = 'output'

394

mode = 'output'

395

code = u''

395

code = u''

396

insertion = (0, Generic.Prompt, line)

396

insertion = (0, Generic.Prompt, line)

397

return mode, code, insertion

397

return mode, code, insertion

398

399

# Check for output prompt

399

# Check for output prompt

400

out_match = self.out_regex.match(line)

400

out_match = self.out_regex.match(line)

401

out_match_rstrip = self.out_regex_rstrip.match(line)

401

out_match_rstrip = self.out_regex_rstrip.match(line)

402

if out_match or out_match_rstrip:

402

if out_match or out_match_rstrip:

403

mode = 'output'

403

mode = 'output'

404

if out_match:

404

if out_match:

405

idx = out_match.end()

405

idx = out_match.end()

406

else:

406

else:

407

idx = out_match_rstrip.end()

407

idx = out_match_rstrip.end()

408

code = line[idx:]

408

code = line[idx:]

409

# Use the 'heading' token for output. We cannot use Generic.Error

409

# Use the 'heading' token for output. We cannot use Generic.Error

410

# since it would conflict with exceptions.

410

# since it would conflict with exceptions.

411

insertion = (0, Generic.Heading, line[:idx])

411

insertion = (0, Generic.Heading, line[:idx])

412

return mode, code, insertion

412

return mode, code, insertion

413

414

415

# Check for input or continuation prompt (non stripped version)

415

# Check for input or continuation prompt (non stripped version)

416

in1_match = self.in1_regex.match(line)

416

in1_match = self.in1_regex.match(line)

417

if in1_match or (in2_match and self.mode != 'tb'):

417

if in1_match or (in2_match and self.mode != 'tb'):

418

# New input or when not in tb, continued input.

418

# New input or when not in tb, continued input.

419

# We do not check for continued input when in tb since it is

419

# We do not check for continued input when in tb since it is

420

# allowable to replace a long stack with an ellipsis.

420

# allowable to replace a long stack with an ellipsis.

421

mode = 'input'

421

mode = 'input'

422

if in1_match:

422

if in1_match:

423

idx = in1_match.end()

423

idx = in1_match.end()

424

else: # in2_match

424

else: # in2_match

425

idx = in2_match.end()

425

idx = in2_match.end()

426

code = line[idx:]

426

code = line[idx:]

427

insertion = (0, Generic.Prompt, line[:idx])

427

insertion = (0, Generic.Prompt, line[:idx])

428

return mode, code, insertion

428

return mode, code, insertion

429

430

# Check for input or continuation prompt (stripped version)

430

# Check for input or continuation prompt (stripped version)

431

in1_match_rstrip = self.in1_regex_rstrip.match(line)

431

in1_match_rstrip = self.in1_regex_rstrip.match(line)

432

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

432

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

433

# New input or when not in tb, continued input.

433

# New input or when not in tb, continued input.

434

# We do not check for continued input when in tb since it is

434

# We do not check for continued input when in tb since it is

435

# allowable to replace a long stack with an ellipsis.

435

# allowable to replace a long stack with an ellipsis.

436

mode = 'input'

436

mode = 'input'

437

if in1_match_rstrip:

437

if in1_match_rstrip:

438

idx = in1_match_rstrip.end()

438

idx = in1_match_rstrip.end()

439

else: # in2_match

439

else: # in2_match

440

idx = in2_match_rstrip.end()

440

idx = in2_match_rstrip.end()

441

code = line[idx:]

441

code = line[idx:]

442

insertion = (0, Generic.Prompt, line[:idx])

442

insertion = (0, Generic.Prompt, line[:idx])

443

return mode, code, insertion

443

return mode, code, insertion

444

445

# Check for traceback

445

# Check for traceback

446

if self.ipytb_start.match(line):

446

if self.ipytb_start.match(line):

447

mode = 'tb'

447

mode = 'tb'

448

code = line

448

code = line

449

insertion = None

449

insertion = None

450

return mode, code, insertion

450

return mode, code, insertion

451

452

# All other stuff...

452

# All other stuff...

453

if self.mode in ('input', 'output'):

453

if self.mode in ('input', 'output'):

454

# We assume all other text is output. Multiline input that

454

# We assume all other text is output. Multiline input that

455

# does not use the continuation marker cannot be detected.

455

# does not use the continuation marker cannot be detected.

456

# For example, the 3 in the following is clearly output:

456

# For example, the 3 in the following is clearly output:

457

#

457

#

458

# In [1]: print 3

458

# In [1]: print 3

459

# 3

459

# 3

460

#

460

#

461

# But the following second line is part of the input:

461

# But the following second line is part of the input:

462

#

462

#

463

# In [2]: while True:

463

# In [2]: while True:

464

# print True

464

# print True

465

#

465

#

466

# In both cases, the 2nd line will be 'output'.

466

# In both cases, the 2nd line will be 'output'.

467

#

467

#

468

mode = 'output'

468

mode = 'output'

469

else:

469

else:

470

mode = 'tb'

470

mode = 'tb'

471

472

code = line

472

code = line

473

insertion = None

473

insertion = None

474

475

return mode, code, insertion

475

return mode, code, insertion

476

477

def get_tokens_unprocessed(self, text):

477

def get_tokens_unprocessed(self, text):

478

self.reset()

478

self.reset()

479

for match in line_re.finditer(text):

479

for match in line_re.finditer(text):

480

line = match.group()

480

line = match.group()

481

mode, code, insertion = self.get_mci(line)

481

mode, code, insertion = self.get_mci(line)

482

483

if mode != self.mode:

483

if mode != self.mode:

484

# Yield buffered tokens before transitioning to new mode.

484

# Yield buffered tokens before transitioning to new mode.

485

for token in self.buffered_tokens():

485

for token in self.buffered_tokens():

486

yield token

486

yield token

487

self.mode = mode

487

self.mode = mode

488

489

if insertion:

489

if insertion:

490

self.insertions.append((len(self.buffer), [insertion]))

490

self.insertions.append((len(self.buffer), [insertion]))

491

self.buffer += code

491

self.buffer += code

492

493

for token in self.buffered_tokens():

493

for token in self.buffered_tokens():

494

yield token

494

yield token

495

496

class IPyLexer(Lexer):

496

class IPyLexer(Lexer):

497

"""

497

"""

498

Primary lexer for all IPython-like code.

498

Primary lexer for all IPython-like code.

499

500

This is a simple helper lexer. If the first line of the text begins with

500

This is a simple helper lexer. If the first line of the text begins with

501

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

501

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

502

lexer. If not, then the entire text is parsed with an IPython lexer.

502

lexer. If not, then the entire text is parsed with an IPython lexer.

503

504

The goal is to reduce the number of lexers that are registered

504

The goal is to reduce the number of lexers that are registered

505

with Pygments.

505

with Pygments.

506

507

"""

507

"""

508

name = 'IPy session'

508

name = 'IPy session'

509

aliases = ['ipy']

509

aliases = ['ipy']

510

511

def __init__(self, **options):

511

def __init__(self, **options):

512

self.python3 = get_bool_opt(options, 'python3', False)

512

self.python3 = get_bool_opt(options, 'python3', False)

513

if self.python3:

513

if self.python3:

514

self.aliases = ['ipy3']

514

self.aliases = ['ipy3']

515

else:

515

else:

516

self.aliases = ['ipy2', 'ipy']

516

self.aliases = ['ipy2', 'ipy']

517

518

Lexer.__init__(self, **options)

518

Lexer.__init__(self, **options)

519

520

self.IPythonLexer = IPythonLexer(**options)

520

self.IPythonLexer = IPythonLexer(**options)

521

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

521

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

522

523

def get_tokens_unprocessed(self, text):

523

def get_tokens_unprocessed(self, text):

524

# Search for the input prompt anywhere...this allows code blocks to

524

# Search for the input prompt anywhere...this allows code blocks to

525

# begin with comments as well.

525

# begin with comments as well.

526

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

526

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

527

lex = self.IPythonConsoleLexer

527

lex = self.IPythonConsoleLexer

528

else:

528

else:

529

lex = self.IPythonLexer

529

lex = self.IPythonLexer

530

for token in lex.get_tokens_unprocessed(text):

530

for token in lex.get_tokens_unprocessed(text):

531

yield token

531

yield token

532

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
             Defines a variety of Pygments lexers for highlighting IPython code.
             This includes:
                 IPythonLexer, IPython3Lexer
                     Lexers for pure IPython (python + magic/shell commands)
                 IPythonPartialTracebackLexer, IPythonTracebackLexer
                     Supports 2.x and 3.x via keyword `python3`.  The partial traceback
                     lexer reads everything but the Python code appearing in a traceback.
                     The full lexer combines the partial lexer with an IPython lexer.
                 IPythonConsoleLexer
                     A lexer for IPython console sessions, with support for tracebacks.
                 IPyLexer
                     A friendly lexer which examines the first line of text and from it,
                     decides whether to use an IPython lexer or an IPython console lexer.
                     This is probably the only lexer that needs to be explicitly added
                     to Pygments.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             # Standard library
             import re
             # Third party
             from pygments.lexers import (
                 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
                 Python3Lexer, TexLexer)
             from pygments.lexer import (
                 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
             )
             from pygments.token import (
                 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
             )
             from pygments.util import get_bool_opt
             # Local
             line_re = re.compile('.*?\n')
             __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
                        'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
                        'IPythonConsoleLexer', 'IPyLexer']
             def build_ipy_lexer(python3):
                 """Builds IPython lexers depending on the value of `python3`.
                 The lexer inherits from an appropriate Python lexer and then adds
                 information about IPython specific keywords (i.e. magic commands,
                 shell commands, etc.)
                 Parameters
                 ----------
                 python3 : bool
                     If `True`, then build an IPython lexer from a Python 3 lexer.
                 """
                 # It would be nice to have a single IPython lexer class which takes
                 # a boolean `python3`.  But since there are two Python lexer classes,
                 # we will also have two IPython lexer classes.
                 if python3:
                     PyLexer = Python3Lexer
                     name = 'IPython3'
                     aliases = ['ipython3']
                     doc = """IPython3 Lexer"""
                 else:
                     PyLexer = PythonLexer
                     name = 'IPython'
                     aliases = ['ipython2', 'ipython']
                     doc = """IPython Lexer"""
                 ipython_tokens = [
                    (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
                     (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
                     (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
                     (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
-                    (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
+                    (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
                     (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
                     (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
                     (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
                     (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
                     (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
                     (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
                     (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
                     (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                             using(BashLexer), Text)),
                     (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
                     (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
                     (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
                     (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
                     (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
                 ]
                 tokens = PyLexer.tokens.copy()
                 tokens['root'] = ipython_tokens + tokens['root']
                 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
                          '__doc__': doc, 'tokens': tokens}
                 return type(name, (PyLexer,), attrs)
             IPython3Lexer = build_ipy_lexer(python3=True)
             IPythonLexer = build_ipy_lexer(python3=False)
             class IPythonPartialTracebackLexer(RegexLexer):
                 """
                 Partial lexer for IPython tracebacks.
                 Handles all the non-python output. This works for both Python 2.x and 3.x.
                 """
                 name = 'IPython Partial Traceback'
                 tokens = {
                     'root': [
                         # Tracebacks for syntax errors have a different style.
                         # For both types of tracebacks, we mark the first line with
                         # Generic.Traceback.  For syntax errors, we mark the filename
                         # as we mark the filenames for non-syntax tracebacks.
                         #
                         # These two regexps define how IPythonConsoleLexer finds a
                         # traceback.
                         #
                         ## Non-syntax traceback
                         (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
                         ## Syntax traceback
                         (r'^(  File)(.*)(, line )(\d+\n)',
                          bygroups(Generic.Traceback, Name.Namespace,
                                   Generic.Traceback, Literal.Number.Integer)),
                         # (Exception Identifier)(Whitespace)(Traceback Message)
                         (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
                          bygroups(Name.Exception, Generic.Whitespace, Text)),
                         # (Module/Filename)(Text)(Callee)(Function Signature)
                         # Better options for callee and function signature?
                         (r'(.*)( in )(.*)(\(.*\)\n)',
                          bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
                         # Regular line: (Whitespace)(Line Number)(Python Code)
                         (r'(\s*?)(\d+)(.*?\n)',
                          bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
                         # Emphasized line: (Arrow)(Line Number)(Python Code)
                         # Using Exception token so arrow color matches the Exception.
                         (r'(-*>?\s?)(\d+)(.*?\n)',
                          bygroups(Name.Exception, Literal.Number.Integer, Other)),
                         # (Exception Identifier)(Message)
                         (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
                          bygroups(Name.Exception, Text)),
                         # Tag everything else as Other, will be handled later.
                         (r'.*\n', Other),
                     ],
                 }
             class IPythonTracebackLexer(DelegatingLexer):
                 """
                 IPython traceback lexer.
                 For doctests, the tracebacks can be snipped as much as desired with the
                 exception to the lines that designate a traceback. For non-syntax error
                 tracebacks, this is the line of hyphens. For syntax error tracebacks,
                 this is the line which lists the File and line number.
                 """
                 # The lexer inherits from DelegatingLexer.  The "root" lexer is an
                 # appropriate IPython lexer, which depends on the value of the boolean
                 # `python3`.  First, we parse with the partial IPython traceback lexer.
                 # Then, any code marked with the "Other" token is delegated to the root
                 # lexer.
                 #
                 name = 'IPython Traceback'
                 aliases = ['ipythontb']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3tb']
                     else:
                         self.aliases = ['ipython2tb', 'ipythontb']
                     if self.python3:
                         IPyLexer = IPython3Lexer
                     else:
                         IPyLexer = IPythonLexer
                     DelegatingLexer.__init__(self, IPyLexer,
                                              IPythonPartialTracebackLexer, **options)
             class IPythonConsoleLexer(Lexer):
                 """
                 An IPython console lexer for IPython code-blocks and doctests, such as:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: a = 'foo'
                         In [2]: a
                         Out[2]: 'foo'
                         In [3]: print a
                         foo
                         In [4]: 1 / 0
                 Support is also provided for IPython exceptions:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: raise Exception
                         ---------------------------------------------------------------------------
                         Exception                                 Traceback (most recent call last)
                         <ipython-input-1-fca2ab0ca76b> in <module>
                         ----> 1 raise Exception
                         Exception:
                 """
                 name = 'IPython console session'
                 aliases = ['ipythonconsole']
                 mimetypes = ['text/x-ipython-console']
                 # The regexps used to determine what is input and what is output.
                 # The default prompts for IPython are:
                 #
                 #    in           = 'In [#]: '
                 #    continuation = '   .D.: '
                 #    template     = 'Out[#]: '
                 #
                 # Where '#' is the 'prompt number' or 'execution count' and 'D'
                 # D is a number of dots  matching the width of the execution count
                 #
                 in1_regex = r'In \[[0-9]+\]: '
                 in2_regex = r'   \.\.+\.: '
                 out_regex = r'Out\[[0-9]+\]: '
                 #: The regex to determine when a traceback starts.
                 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^(  File)(.*)(, line )(\d+\n)')
                 def __init__(self, **options):
                     """Initialize the IPython console lexer.
                     Parameters
                     ----------
                     python3 : bool
                         If `True`, then the console inputs are parsed using a Python 3
                         lexer. Otherwise, they are parsed using a Python 2 lexer.
                     in1_regex : RegexObject
                         The compiled regular expression used to detect the start
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     in2_regex : RegexObject
                         The compiled regular expression used to detect the continuation
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     out_regex : RegexObject
                         The compiled regular expression used to detect outputs. If `None`,
                         then the default output prompt is assumed.
                     """
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3console']
                     else:
                         self.aliases = ['ipython2console', 'ipythonconsole']
                     in1_regex = options.get('in1_regex', self.in1_regex)
                     in2_regex = options.get('in2_regex', self.in2_regex)
                     out_regex = options.get('out_regex', self.out_regex)
                     # So that we can work with input and output prompts which have been
                     # rstrip'd (possibly by editors) we also need rstrip'd variants. If
                     # we do not do this, then such prompts will be tagged as 'output'.
                     # The reason can't just use the rstrip'd variants instead is because
                     # we want any whitespace associated with the prompt to be inserted
                     # with the token. This allows formatted code to be modified so as hide
                     # the appearance of prompts, with the whitespace included. One example
                     # use of this is in copybutton.js from the standard lib Python docs.
                     in1_regex_rstrip = in1_regex.rstrip() + '\n'
                     in2_regex_rstrip = in2_regex.rstrip() + '\n'
                     out_regex_rstrip = out_regex.rstrip() + '\n'
                     # Compile and save them all.
                     attrs = ['in1_regex', 'in2_regex', 'out_regex',
                              'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
                     for attr in attrs:
                         self.__setattr__(attr, re.compile(locals()[attr]))
                     Lexer.__init__(self, **options)
                     if self.python3:
                         pylexer = IPython3Lexer
                         tblexer = IPythonTracebackLexer
                     else:
                         pylexer = IPythonLexer
                         tblexer = IPythonTracebackLexer
                     self.pylexer = pylexer(**options)
                     self.tblexer = tblexer(**options)
                     self.reset()
                 def reset(self):
                     self.mode = 'output'
                     self.index = 0
                     self.buffer = u''
                     self.insertions = []
                 def buffered_tokens(self):
                     """
                     Generator of unprocessed tokens after doing insertions and before
                     changing to a new state.
                     """
                     if self.mode == 'output':
                         tokens = [(0, Generic.Output, self.buffer)]
                     elif self.mode == 'input':
                         tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
                     else: # traceback
                         tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
                     for i, t, v in do_insertions(self.insertions, tokens):
                         # All token indexes are relative to the buffer.
                         yield self.index + i, t, v
                     # Clear it all
                     self.index += len(self.buffer)
                     self.buffer = u''
                     self.insertions = []
                 def get_mci(self, line):
                     """
                     Parses the line and returns a 3-tuple: (mode, code, insertion).
                     `mode` is the next mode (or state) of the lexer, and is always equal
                     to 'input', 'output', or 'tb'.
                     `code` is a portion of the line that should be added to the buffer
                     corresponding to the next mode and eventually lexed by another lexer.
                     For example, `code` could be Python code if `mode` were 'input'.
                     `insertion` is a 3-tuple (index, token, text) representing an
                     unprocessed "token" that will be inserted into the stream of tokens
                     that are created from the buffer once we change modes. This is usually
                     the input or output prompt.
                     In general, the next mode depends on current mode and on the contents
                     of `line`.
                     """
                     # To reduce the number of regex match checks, we have multiple
                     # 'if' blocks instead of 'if-elif' blocks.
                     # Check for possible end of input
                     in2_match = self.in2_regex.match(line)
                     in2_match_rstrip = self.in2_regex_rstrip.match(line)
                     if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
                        in2_match_rstrip:
                         end_input = True
                     else:
                         end_input = False
                     if end_input and self.mode != 'tb':
                         # Only look for an end of input when not in tb mode.
                         # An ellipsis could appear within the traceback.
                         mode = 'output'
                         code = u''
                         insertion = (0, Generic.Prompt, line)
                         return mode, code, insertion
                     # Check for output prompt
                     out_match = self.out_regex.match(line)
                     out_match_rstrip = self.out_regex_rstrip.match(line)
                     if out_match or out_match_rstrip:
                         mode = 'output'
                         if out_match:
                             idx = out_match.end()
                         else:
                             idx = out_match_rstrip.end()
                         code = line[idx:]
                         # Use the 'heading' token for output.  We cannot use Generic.Error
                         # since it would conflict with exceptions.
                         insertion = (0, Generic.Heading, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (non stripped version)
                     in1_match = self.in1_regex.match(line)
                     if in1_match or (in2_match and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match:
                             idx = in1_match.end()
                         else: # in2_match
                             idx = in2_match.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (stripped version)
                     in1_match_rstrip = self.in1_regex_rstrip.match(line)
                     if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match_rstrip:
                             idx = in1_match_rstrip.end()
                         else: # in2_match
                             idx = in2_match_rstrip.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for traceback
                     if self.ipytb_start.match(line):
                         mode = 'tb'
                         code = line
                         insertion = None
                         return mode, code, insertion
                     # All other stuff...
                     if self.mode in ('input', 'output'):
                         # We assume all other text is output. Multiline input that
                         # does not use the continuation marker cannot be detected.
                         # For example, the 3 in the following is clearly output:
                         #
                         #    In [1]: print 3
                         #    3
                         #
                         # But the following second line is part of the input:
                         #
                         #    In [2]: while True:
                         #        print True
                         #
                         # In both cases, the 2nd line will be 'output'.
                         #
                         mode = 'output'
                     else:
                         mode = 'tb'
                     code = line
                     insertion = None
                     return mode, code, insertion
                 def get_tokens_unprocessed(self, text):
                     self.reset()
                     for match in line_re.finditer(text):
                         line = match.group()
                         mode, code, insertion = self.get_mci(line)
                         if mode != self.mode:
                             # Yield buffered tokens before transitioning to new mode.
                             for token in self.buffered_tokens():
                                 yield token
                             self.mode = mode
                         if insertion:
                             self.insertions.append((len(self.buffer), [insertion]))
                         self.buffer += code
                     for token in self.buffered_tokens():
                         yield token
             class IPyLexer(Lexer):
                 """
                 Primary lexer for all IPython-like code.
                 This is a simple helper lexer.  If the first line of the text begins with
                 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
                 lexer. If not, then the entire text is parsed with an IPython lexer.
                 The goal is to reduce the number of lexers that are registered
                 with Pygments.
                 """
                 name = 'IPy session'
                 aliases = ['ipy']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipy3']
                     else:
                         self.aliases = ['ipy2', 'ipy']
                     Lexer.__init__(self, **options)
                     self.IPythonLexer = IPythonLexer(**options)
                     self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
                 def get_tokens_unprocessed(self, text):
                     # Search for the input prompt anywhere...this allows code blocks to
                     # begin with comments as well.
                     if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
                         lex = self.IPythonConsoleLexer
                     else:
                         lex = self.IPythonLexer
                     for token in lex.get_tokens_unprocessed(text):
                         yield token