upstream/ipython Commit - r28215:6a8f5246

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

5

This includes:

6

7

IPythonLexer, IPython3Lexer

7

IPythonLexer, IPython3Lexer

8

Lexers for pure IPython (python + magic/shell commands)

8

Lexers for pure IPython (python + magic/shell commands)

9

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

12

lexer reads everything but the Python code appearing in a traceback.

12

lexer reads everything but the Python code appearing in a traceback.

13

The full lexer combines the partial lexer with an IPython lexer.

13

The full lexer combines the partial lexer with an IPython lexer.

14

15

IPythonConsoleLexer

15

IPythonConsoleLexer

16

A lexer for IPython console sessions, with support for tracebacks.

16

A lexer for IPython console sessions, with support for tracebacks.

17

18

IPyLexer

18

IPyLexer

19

A friendly lexer which examines the first line of text and from it,

19

A friendly lexer which examines the first line of text and from it,

20

decides whether to use an IPython lexer or an IPython console lexer.

20

decides whether to use an IPython lexer or an IPython console lexer.

21

This is probably the only lexer that needs to be explicitly added

21

This is probably the only lexer that needs to be explicitly added

22

to Pygments.

22

to Pygments.

23

24

"""

24

"""

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

27

#

27

#

28

# Distributed under the terms of the Modified BSD License.

28

# Distributed under the terms of the Modified BSD License.

29

#

29

#

30

# The full license is in the file COPYING.txt, distributed with this software.

30

# The full license is in the file COPYING.txt, distributed with this software.

31

#-----------------------------------------------------------------------------

31

#-----------------------------------------------------------------------------

32

33

# Standard library

33

# Standard library

34

import re

34

import re

35

36

# Third party

36

# Third party

37

from pygments.lexers import (

37

from pygments.lexers import (

38

BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,

38

BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,

39

Python3Lexer, TexLexer)

39

Python3Lexer, TexLexer)

40

from pygments.lexer import (

40

from pygments.lexer import (

41

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

41

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

42

)

42

)

43

from pygments.token import (

43

from pygments.token import (

44

Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

44

Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

45

)

45

)

46

from pygments.util import get_bool_opt

46

from pygments.util import get_bool_opt

47

48

# Local

48

# Local

49

50

line_re = re.compile('.*?\n')

50

line_re = re.compile('.*?\n')

51

52

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',

52

__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',

53

'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',

53

'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',

54

'IPythonConsoleLexer', 'IPyLexer']

54

'IPythonConsoleLexer', 'IPyLexer']

55

56

57

def build_ipy_lexer(python3):

57

def build_ipy_lexer(python3):

58

"""Builds IPython lexers depending on the value of `python3`.

58

"""Builds IPython lexers depending on the value of `python3`.

59

60

The lexer inherits from an appropriate Python lexer and then adds

60

The lexer inherits from an appropriate Python lexer and then adds

61

information about IPython specific keywords (i.e. magic commands,

61

information about IPython specific keywords (i.e. magic commands,

62

shell commands, etc.)

62

shell commands, etc.)

63

64

Parameters

64

Parameters

65

----------

65

----------

66

python3 : bool

66

python3 : bool

67

If `True`, then build an IPython lexer from a Python 3 lexer.

67

If `True`, then build an IPython lexer from a Python 3 lexer.

68

69

"""

69

"""

70

# It would be nice to have a single IPython lexer class which takes

70

# It would be nice to have a single IPython lexer class which takes

71

# a boolean `python3`. But since there are two Python lexer classes,

71

# a boolean `python3`. But since there are two Python lexer classes,

72

# we will also have two IPython lexer classes.

72

# we will also have two IPython lexer classes.

73

if python3:

73

if python3:

74

PyLexer = Python3Lexer

74

PyLexer = Python3Lexer

75

name = 'IPython3'

75

name = 'IPython3'

76

aliases = ['ipython3']

76

aliases = ['ipython3']

77

doc = """IPython3 Lexer"""

77

doc = """IPython3 Lexer"""

78

else:

78

else:

79

PyLexer = PythonLexer

79

PyLexer = PythonLexer

80

name = 'IPython'

80

name = 'IPython'

81

aliases = ['ipython2', 'ipython']

81

aliases = ['ipython2', 'ipython']

82

doc = """IPython Lexer"""

82

doc = """IPython Lexer"""

83

84

ipython_tokens = [

84

ipython_tokens = [

85

(r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

85

(r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

86

(r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

86

(r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

87

(r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),

87

(r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),

88

(r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

88

(r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

89

(r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

89

(r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),

90

(r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),

90

(r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),

91

(r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),

91

(r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),

92

(r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

92

(r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

93

(r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

93

(r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

94

(r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

94

(r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

95

(r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),

95

(r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),

96

(r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),

96

(r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),

97

(r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),

97

(r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),

98

(r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

98

(r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

99

(r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

99

(r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

100

(r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

100

(r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

101

(r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

101

(r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),

102

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

102

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

103

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

103

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

104

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

104

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

105

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

105

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

106

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

106

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

107

using(BashLexer), Text)),

107

using(BashLexer), Text)),

108

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

108

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

109

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

109

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

110

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

110

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

111

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

111

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

112

(r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),

112

(r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),

113

]

113

]

114

115

tokens = PyLexer.tokens.copy()

115

tokens = PyLexer.tokens.copy()

116

tokens['root'] = ipython_tokens + tokens['root']

116

tokens['root'] = ipython_tokens + tokens['root']

117

118

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

118

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

119

'__doc__': doc, 'tokens': tokens}

119

'__doc__': doc, 'tokens': tokens}

120

121

return type(name, (PyLexer,), attrs)

121

return type(name, (PyLexer,), attrs)

122

123

124

IPython3Lexer = build_ipy_lexer(python3=True)

124

IPython3Lexer = build_ipy_lexer(python3=True)

125

IPythonLexer = build_ipy_lexer(python3=False)

125

IPythonLexer = build_ipy_lexer(python3=False)

126

127

128

class IPythonPartialTracebackLexer(RegexLexer):

128

class IPythonPartialTracebackLexer(RegexLexer):

129

"""

129

"""

130

Partial lexer for IPython tracebacks.

130

Partial lexer for IPython tracebacks.

131

132

Handles all the non-python output.

132

Handles all the non-python output.

133

134

"""

134

"""

135

name = 'IPython Partial Traceback'

135

name = 'IPython Partial Traceback'

136

137

tokens = {

137

tokens = {

138

'root': [

138

'root': [

139

# Tracebacks for syntax errors have a different style.

139

# Tracebacks for syntax errors have a different style.

140

# For both types of tracebacks, we mark the first line with

140

# For both types of tracebacks, we mark the first line with

141

# Generic.Traceback. For syntax errors, we mark the filename

141

# Generic.Traceback. For syntax errors, we mark the filename

142

# as we mark the filenames for non-syntax tracebacks.

142

# as we mark the filenames for non-syntax tracebacks.

143

#

143

#

144

# These two regexps define how IPythonConsoleLexer finds a

144

# These two regexps define how IPythonConsoleLexer finds a

145

# traceback.

145

# traceback.

146

#

146

#

147

## Non-syntax traceback

147

## Non-syntax traceback

148

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

148

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

149

## Syntax traceback

149

## Syntax traceback

150

(r'^( File)(.*)(, line )(\d+\n)',

150

(r'^( File)(.*)(, line )(\d+\n)',

151

bygroups(Generic.Traceback, Name.Namespace,

151

bygroups(Generic.Traceback, Name.Namespace,

152

Generic.Traceback, Literal.Number.Integer)),

152

Generic.Traceback, Literal.Number.Integer)),

153

154

# (Exception Identifier)(Whitespace)(Traceback Message)

154

# (Exception Identifier)(Whitespace)(Traceback Message)

155

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

155

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

156

bygroups(Name.Exception, Generic.Whitespace, Text)),

156

bygroups(Name.Exception, Generic.Whitespace, Text)),

157

# (Module/Filename)(Text)(Callee)(Function Signature)

157

# (Module/Filename)(Text)(Callee)(Function Signature)

158

# Better options for callee and function signature?

158

# Better options for callee and function signature?

159

(r'(.*)( in )(.*)($.*$\n)',

159

(r'(.*)( in )(.*)($.*$\n)',

160

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

160

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

161

# Regular line: (Whitespace)(Line Number)(Python Code)

161

# Regular line: (Whitespace)(Line Number)(Python Code)

162

(r'(\s*?)(\d+)(.*?\n)',

162

(r'(\s*?)(\d+)(.*?\n)',

163

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

163

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

164

# Emphasized line: (Arrow)(Line Number)(Python Code)

164

# Emphasized line: (Arrow)(Line Number)(Python Code)

165

# Using Exception token so arrow color matches the Exception.

165

# Using Exception token so arrow color matches the Exception.

166

(r'(-*>?\s?)(\d+)(.*?\n)',

166

(r'(-*>?\s?)(\d+)(.*?\n)',

167

bygroups(Name.Exception, Literal.Number.Integer, Other)),

167

bygroups(Name.Exception, Literal.Number.Integer, Other)),

168

# (Exception Identifier)(Message)

168

# (Exception Identifier)(Message)

169

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

169

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

170

bygroups(Name.Exception, Text)),

170

bygroups(Name.Exception, Text)),

171

# Tag everything else as Other, will be handled later.

171

# Tag everything else as Other, will be handled later.

172

(r'.*\n', Other),

172

(r'.*\n', Other),

173

],

173

],

174

}

174

}

175

176

177

class IPythonTracebackLexer(DelegatingLexer):

177

class IPythonTracebackLexer(DelegatingLexer):

178

"""

178

"""

179

IPython traceback lexer.

179

IPython traceback lexer.

180

181

For doctests, the tracebacks can be snipped as much as desired with the

181

For doctests, the tracebacks can be snipped as much as desired with the

182

exception to the lines that designate a traceback. For non-syntax error

182

exception to the lines that designate a traceback. For non-syntax error

183

tracebacks, this is the line of hyphens. For syntax error tracebacks,

183

tracebacks, this is the line of hyphens. For syntax error tracebacks,

184

this is the line which lists the File and line number.

184

this is the line which lists the File and line number.

185

186

"""

186

"""

187

# The lexer inherits from DelegatingLexer. The "root" lexer is an

187

# The lexer inherits from DelegatingLexer. The "root" lexer is an

188

# appropriate IPython lexer, which depends on the value of the boolean

188

# appropriate IPython lexer, which depends on the value of the boolean

189

# `python3`. First, we parse with the partial IPython traceback lexer.

189

# `python3`. First, we parse with the partial IPython traceback lexer.

190

# Then, any code marked with the "Other" token is delegated to the root

190

# Then, any code marked with the "Other" token is delegated to the root

191

# lexer.

191

# lexer.

192

#

192

#

193

name = 'IPython Traceback'

193

name = 'IPython Traceback'

194

aliases = ['ipythontb']

194

aliases = ['ipythontb']

195

196

def __init__(self, **options):

196

def __init__(self, **options):

197

"""

198

A subclass of `DelegatingLexer` which delegates to the appropriate to either IPyLexer,

199

IPythonPartialTracebackLexer.

200

"""

201

# note we need a __init__ doc, as otherwise it inherits the doc from the super class

202

# which will fail the documentation build as it references section of the pygments docs that

203

# do not exists when building IPython's docs.

197

self.python3 = get_bool_opt(options, 'python3', False)

204

self.python3 = get_bool_opt(options, 'python3', False)

198

if self.python3:

205

if self.python3:

199

self.aliases = ['ipython3tb']

206

self.aliases = ['ipython3tb']

200

else:

207

else:

201

self.aliases = ['ipython2tb', 'ipythontb']

208

self.aliases = ['ipython2tb', 'ipythontb']

202

209

203

if self.python3:

210

if self.python3:

204

IPyLexer = IPython3Lexer

211

IPyLexer = IPython3Lexer

205

else:

212

else:

206

IPyLexer = IPythonLexer

213

IPyLexer = IPythonLexer

207

214

208

DelegatingLexer.__init__(self, IPyLexer,

215

DelegatingLexer.__init__(self, IPyLexer,

209

IPythonPartialTracebackLexer, **options)

216

IPythonPartialTracebackLexer, **options)

210

217

211

class IPythonConsoleLexer(Lexer):

218

class IPythonConsoleLexer(Lexer):

212

"""

219

"""

213

An IPython console lexer for IPython code-blocks and doctests, such as:

220

An IPython console lexer for IPython code-blocks and doctests, such as:

214

221

215

.. code-block:: rst

222

.. code-block:: rst

216

223

217

.. code-block:: ipythonconsole

224

.. code-block:: ipythonconsole

218

225

219

In [1]: a = 'foo'

226

In [1]: a = 'foo'

220

227

221

In [2]: a

228

In [2]: a

222

Out[2]: 'foo'

229

Out[2]: 'foo'

223

230

224

In [3]: print(a)

231

In [3]: print(a)

225

foo

232

foo

226

233

227

234

228

Support is also provided for IPython exceptions:

235

Support is also provided for IPython exceptions:

229

236

230

.. code-block:: rst

237

.. code-block:: rst

231

238

232

.. code-block:: ipythonconsole

239

.. code-block:: ipythonconsole

233

240

234

In [1]: raise Exception

241

In [1]: raise Exception

235

Traceback (most recent call last):

242

Traceback (most recent call last):

236

...

243

...

237

Exception

244

Exception

238

245

239

"""

246

"""

240

name = 'IPython console session'

247

name = 'IPython console session'

241

aliases = ['ipythonconsole']

248

aliases = ['ipythonconsole']

242

mimetypes = ['text/x-ipython-console']

249

mimetypes = ['text/x-ipython-console']

243

250

244

# The regexps used to determine what is input and what is output.

251

# The regexps used to determine what is input and what is output.

245

# The default prompts for IPython are:

252

# The default prompts for IPython are:

246

#

253

#

247

# in = 'In [#]: '

254

# in = 'In [#]: '

248

# continuation = ' .D.: '

255

# continuation = ' .D.: '

249

# template = 'Out[#]: '

256

# template = 'Out[#]: '

250

#

257

#

251

# Where '#' is the 'prompt number' or 'execution count' and 'D'

258

# Where '#' is the 'prompt number' or 'execution count' and 'D'

252

# D is a number of dots matching the width of the execution count

259

# D is a number of dots matching the width of the execution count

253

#

260

#

254

in1_regex = r'In \[[0-9]+\]: '

261

in1_regex = r'In \[[0-9]+\]: '

255

in2_regex = r' \.\.+\.: '

262

in2_regex = r' \.\.+\.: '

256

out_regex = r'Out\[[0-9]+\]: '

263

out_regex = r'Out\[[0-9]+\]: '

257

264

258

#: The regex to determine when a traceback starts.

265

#: The regex to determine when a traceback starts.

259

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

266

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

260

267

261

def __init__(self, **options):

268

def __init__(self, **options):

262

"""Initialize the IPython console lexer.

269

"""Initialize the IPython console lexer.

263

270

264

Parameters

271

Parameters

265

----------

272

----------

266

python3 : bool

273

python3 : bool

267

If `True`, then the console inputs are parsed using a Python 3

274

If `True`, then the console inputs are parsed using a Python 3

268

lexer. Otherwise, they are parsed using a Python 2 lexer.

275

lexer. Otherwise, they are parsed using a Python 2 lexer.

269

in1_regex : RegexObject

276

in1_regex : RegexObject

270

The compiled regular expression used to detect the start

277

The compiled regular expression used to detect the start

271

of inputs. Although the IPython configuration setting may have a

278

of inputs. Although the IPython configuration setting may have a

272

trailing whitespace, do not include it in the regex. If `None`,

279

trailing whitespace, do not include it in the regex. If `None`,

273

then the default input prompt is assumed.

280

then the default input prompt is assumed.

274

in2_regex : RegexObject

281

in2_regex : RegexObject

275

The compiled regular expression used to detect the continuation

282

The compiled regular expression used to detect the continuation

276

of inputs. Although the IPython configuration setting may have a

283

of inputs. Although the IPython configuration setting may have a

277

trailing whitespace, do not include it in the regex. If `None`,

284

trailing whitespace, do not include it in the regex. If `None`,

278

then the default input prompt is assumed.

285

then the default input prompt is assumed.

279

out_regex : RegexObject

286

out_regex : RegexObject

280

The compiled regular expression used to detect outputs. If `None`,

287

The compiled regular expression used to detect outputs. If `None`,

281

then the default output prompt is assumed.

288

then the default output prompt is assumed.

282

289

283

"""

290

"""

284

self.python3 = get_bool_opt(options, 'python3', False)

291

self.python3 = get_bool_opt(options, 'python3', False)

285

if self.python3:

292

if self.python3:

286

self.aliases = ['ipython3console']

293

self.aliases = ['ipython3console']

287

else:

294

else:

288

self.aliases = ['ipython2console', 'ipythonconsole']

295

self.aliases = ['ipython2console', 'ipythonconsole']

289

296

290

in1_regex = options.get('in1_regex', self.in1_regex)

297

in1_regex = options.get('in1_regex', self.in1_regex)

291

in2_regex = options.get('in2_regex', self.in2_regex)

298

in2_regex = options.get('in2_regex', self.in2_regex)

292

out_regex = options.get('out_regex', self.out_regex)

299

out_regex = options.get('out_regex', self.out_regex)

293

300

294

# So that we can work with input and output prompts which have been

301

# So that we can work with input and output prompts which have been

295

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

302

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

296

# we do not do this, then such prompts will be tagged as 'output'.

303

# we do not do this, then such prompts will be tagged as 'output'.

297

# The reason can't just use the rstrip'd variants instead is because

304

# The reason can't just use the rstrip'd variants instead is because

298

# we want any whitespace associated with the prompt to be inserted

305

# we want any whitespace associated with the prompt to be inserted

299

# with the token. This allows formatted code to be modified so as hide

306

# with the token. This allows formatted code to be modified so as hide

300

# the appearance of prompts, with the whitespace included. One example

307

# the appearance of prompts, with the whitespace included. One example

301

# use of this is in copybutton.js from the standard lib Python docs.

308

# use of this is in copybutton.js from the standard lib Python docs.

302

in1_regex_rstrip = in1_regex.rstrip() + '\n'

309

in1_regex_rstrip = in1_regex.rstrip() + '\n'

303

in2_regex_rstrip = in2_regex.rstrip() + '\n'

310

in2_regex_rstrip = in2_regex.rstrip() + '\n'

304

out_regex_rstrip = out_regex.rstrip() + '\n'

311

out_regex_rstrip = out_regex.rstrip() + '\n'

305

312

306

# Compile and save them all.

313

# Compile and save them all.

307

attrs = ['in1_regex', 'in2_regex', 'out_regex',

314

attrs = ['in1_regex', 'in2_regex', 'out_regex',

308

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

315

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

309

for attr in attrs:

316

for attr in attrs:

310

self.__setattr__(attr, re.compile(locals()[attr]))

317

self.__setattr__(attr, re.compile(locals()[attr]))

311

318

312

Lexer.__init__(self, **options)

319

Lexer.__init__(self, **options)

313

320

314

if self.python3:

321

if self.python3:

315

pylexer = IPython3Lexer

322

pylexer = IPython3Lexer

316

tblexer = IPythonTracebackLexer

323

tblexer = IPythonTracebackLexer

317

else:

324

else:

318

pylexer = IPythonLexer

325

pylexer = IPythonLexer

319

tblexer = IPythonTracebackLexer

326

tblexer = IPythonTracebackLexer

320

327

321

self.pylexer = pylexer(**options)

328

self.pylexer = pylexer(**options)

322

self.tblexer = tblexer(**options)

329

self.tblexer = tblexer(**options)

323

330

324

self.reset()

331

self.reset()

325

332

326

def reset(self):

333

def reset(self):

327

self.mode = 'output'

334

self.mode = 'output'

328

self.index = 0

335

self.index = 0

329

self.buffer = u''

336

self.buffer = u''

330

self.insertions = []

337

self.insertions = []

331

338

332

def buffered_tokens(self):

339

def buffered_tokens(self):

333

"""

340

"""

334

Generator of unprocessed tokens after doing insertions and before

341

Generator of unprocessed tokens after doing insertions and before

335

changing to a new state.

342

changing to a new state.

336

343

337

"""

344

"""

338

if self.mode == 'output':

345

if self.mode == 'output':

339

tokens = [(0, Generic.Output, self.buffer)]

346

tokens = [(0, Generic.Output, self.buffer)]

340

elif self.mode == 'input':

347

elif self.mode == 'input':

341

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

348

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

342

else: # traceback

349

else: # traceback

343

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

350

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

344

351

345

for i, t, v in do_insertions(self.insertions, tokens):

352

for i, t, v in do_insertions(self.insertions, tokens):

346

# All token indexes are relative to the buffer.

353

# All token indexes are relative to the buffer.

347

yield self.index + i, t, v

354

yield self.index + i, t, v

348

355

349

# Clear it all

356

# Clear it all

350

self.index += len(self.buffer)

357

self.index += len(self.buffer)

351

self.buffer = u''

358

self.buffer = u''

352

self.insertions = []

359

self.insertions = []

353

360

354

def get_mci(self, line):

361

def get_mci(self, line):

355

"""

362

"""

356

Parses the line and returns a 3-tuple: (mode, code, insertion).

363

Parses the line and returns a 3-tuple: (mode, code, insertion).

357

364

358

`mode` is the next mode (or state) of the lexer, and is always equal

365

`mode` is the next mode (or state) of the lexer, and is always equal

359

to 'input', 'output', or 'tb'.

366

to 'input', 'output', or 'tb'.

360

367

361

`code` is a portion of the line that should be added to the buffer

368

`code` is a portion of the line that should be added to the buffer

362

corresponding to the next mode and eventually lexed by another lexer.

369

corresponding to the next mode and eventually lexed by another lexer.

363

For example, `code` could be Python code if `mode` were 'input'.

370

For example, `code` could be Python code if `mode` were 'input'.

364

371

365

`insertion` is a 3-tuple (index, token, text) representing an

372

`insertion` is a 3-tuple (index, token, text) representing an

366

unprocessed "token" that will be inserted into the stream of tokens

373

unprocessed "token" that will be inserted into the stream of tokens

367

that are created from the buffer once we change modes. This is usually

374

that are created from the buffer once we change modes. This is usually

368

the input or output prompt.

375

the input or output prompt.

369

376

370

In general, the next mode depends on current mode and on the contents

377

In general, the next mode depends on current mode and on the contents

371

of `line`.

378

of `line`.

372

379

373

"""

380

"""

374

# To reduce the number of regex match checks, we have multiple

381

# To reduce the number of regex match checks, we have multiple

375

# 'if' blocks instead of 'if-elif' blocks.

382

# 'if' blocks instead of 'if-elif' blocks.

376

383

377

# Check for possible end of input

384

# Check for possible end of input

378

in2_match = self.in2_regex.match(line)

385

in2_match = self.in2_regex.match(line)

379

in2_match_rstrip = self.in2_regex_rstrip.match(line)

386

in2_match_rstrip = self.in2_regex_rstrip.match(line)

380

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

387

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

381

in2_match_rstrip:

388

in2_match_rstrip:

382

end_input = True

389

end_input = True

383

else:

390

else:

384

end_input = False

391

end_input = False

385

if end_input and self.mode != 'tb':

392

if end_input and self.mode != 'tb':

386

# Only look for an end of input when not in tb mode.

393

# Only look for an end of input when not in tb mode.

387

# An ellipsis could appear within the traceback.

394

# An ellipsis could appear within the traceback.

388

mode = 'output'

395

mode = 'output'

389

code = u''

396

code = u''

390

insertion = (0, Generic.Prompt, line)

397

insertion = (0, Generic.Prompt, line)

391

return mode, code, insertion

398

return mode, code, insertion

392

399

393

# Check for output prompt

400

# Check for output prompt

394

out_match = self.out_regex.match(line)

401

out_match = self.out_regex.match(line)

395

out_match_rstrip = self.out_regex_rstrip.match(line)

402

out_match_rstrip = self.out_regex_rstrip.match(line)

396

if out_match or out_match_rstrip:

403

if out_match or out_match_rstrip:

397

mode = 'output'

404

mode = 'output'

398

if out_match:

405

if out_match:

399

idx = out_match.end()

406

idx = out_match.end()

400

else:

407

else:

401

idx = out_match_rstrip.end()

408

idx = out_match_rstrip.end()

402

code = line[idx:]

409

code = line[idx:]

403

# Use the 'heading' token for output. We cannot use Generic.Error

410

# Use the 'heading' token for output. We cannot use Generic.Error

404

# since it would conflict with exceptions.

411

# since it would conflict with exceptions.

405

insertion = (0, Generic.Heading, line[:idx])

412

insertion = (0, Generic.Heading, line[:idx])

406

return mode, code, insertion

413

return mode, code, insertion

407

414

408

415

409

# Check for input or continuation prompt (non stripped version)

416

# Check for input or continuation prompt (non stripped version)

410

in1_match = self.in1_regex.match(line)

417

in1_match = self.in1_regex.match(line)

411

if in1_match or (in2_match and self.mode != 'tb'):

418

if in1_match or (in2_match and self.mode != 'tb'):

412

# New input or when not in tb, continued input.

419

# New input or when not in tb, continued input.

413

# We do not check for continued input when in tb since it is

420

# We do not check for continued input when in tb since it is

414

# allowable to replace a long stack with an ellipsis.

421

# allowable to replace a long stack with an ellipsis.

415

mode = 'input'

422

mode = 'input'

416

if in1_match:

423

if in1_match:

417

idx = in1_match.end()

424

idx = in1_match.end()

418

else: # in2_match

425

else: # in2_match

419

idx = in2_match.end()

426

idx = in2_match.end()

420

code = line[idx:]

427

code = line[idx:]

421

insertion = (0, Generic.Prompt, line[:idx])

428

insertion = (0, Generic.Prompt, line[:idx])

422

return mode, code, insertion

429

return mode, code, insertion

423

430

424

# Check for input or continuation prompt (stripped version)

431

# Check for input or continuation prompt (stripped version)

425

in1_match_rstrip = self.in1_regex_rstrip.match(line)

432

in1_match_rstrip = self.in1_regex_rstrip.match(line)

426

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

433

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

427

# New input or when not in tb, continued input.

434

# New input or when not in tb, continued input.

428

# We do not check for continued input when in tb since it is

435

# We do not check for continued input when in tb since it is

429

# allowable to replace a long stack with an ellipsis.

436

# allowable to replace a long stack with an ellipsis.

430

mode = 'input'

437

mode = 'input'

431

if in1_match_rstrip:

438

if in1_match_rstrip:

432

idx = in1_match_rstrip.end()

439

idx = in1_match_rstrip.end()

433

else: # in2_match

440

else: # in2_match

434

idx = in2_match_rstrip.end()

441

idx = in2_match_rstrip.end()

435

code = line[idx:]

442

code = line[idx:]

436

insertion = (0, Generic.Prompt, line[:idx])

443

insertion = (0, Generic.Prompt, line[:idx])

437

return mode, code, insertion

444

return mode, code, insertion

438

445

439

# Check for traceback

446

# Check for traceback

440

if self.ipytb_start.match(line):

447

if self.ipytb_start.match(line):

441

mode = 'tb'

448

mode = 'tb'

442

code = line

449

code = line

443

insertion = None

450

insertion = None

444

return mode, code, insertion

451

return mode, code, insertion

445

452

446

# All other stuff...

453

# All other stuff...

447

if self.mode in ('input', 'output'):

454

if self.mode in ('input', 'output'):

448

# We assume all other text is output. Multiline input that

455

# We assume all other text is output. Multiline input that

449

# does not use the continuation marker cannot be detected.

456

# does not use the continuation marker cannot be detected.

450

# For example, the 3 in the following is clearly output:

457

# For example, the 3 in the following is clearly output:

451

#

458

#

452

# In [1]: print 3

459

# In [1]: print 3

453

# 3

460

# 3

454

#

461

#

455

# But the following second line is part of the input:

462

# But the following second line is part of the input:

456

#

463

#

457

# In [2]: while True:

464

# In [2]: while True:

458

# print True

465

# print True

459

#

466

#

460

# In both cases, the 2nd line will be 'output'.

467

# In both cases, the 2nd line will be 'output'.

461

#

468

#

462

mode = 'output'

469

mode = 'output'

463

else:

470

else:

464

mode = 'tb'

471

mode = 'tb'

465

472

466

code = line

473

code = line

467

insertion = None

474

insertion = None

468

475

469

return mode, code, insertion

476

return mode, code, insertion

470

477

471

def get_tokens_unprocessed(self, text):

478

def get_tokens_unprocessed(self, text):

472

self.reset()

479

self.reset()

473

for match in line_re.finditer(text):

480

for match in line_re.finditer(text):

474

line = match.group()

481

line = match.group()

475

mode, code, insertion = self.get_mci(line)

482

mode, code, insertion = self.get_mci(line)

476

483

477

if mode != self.mode:

484

if mode != self.mode:

478

# Yield buffered tokens before transitioning to new mode.

485

# Yield buffered tokens before transitioning to new mode.

479

for token in self.buffered_tokens():

486

for token in self.buffered_tokens():

480

yield token

487

yield token

481

self.mode = mode

488

self.mode = mode

482

489

483

if insertion:

490

if insertion:

484

self.insertions.append((len(self.buffer), [insertion]))

491

self.insertions.append((len(self.buffer), [insertion]))

485

self.buffer += code

492

self.buffer += code

486

493

487

for token in self.buffered_tokens():

494

for token in self.buffered_tokens():

488

yield token

495

yield token

489

496

490

class IPyLexer(Lexer):

497

class IPyLexer(Lexer):

491

r"""

498

r"""

492

Primary lexer for all IPython-like code.

499

Primary lexer for all IPython-like code.

493

500

494

This is a simple helper lexer. If the first line of the text begins with

501

This is a simple helper lexer. If the first line of the text begins with

495

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

502

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

496

lexer. If not, then the entire text is parsed with an IPython lexer.

503

lexer. If not, then the entire text is parsed with an IPython lexer.

497

504

498

The goal is to reduce the number of lexers that are registered

505

The goal is to reduce the number of lexers that are registered

499

with Pygments.

506

with Pygments.

500

507

501

"""

508

"""

502

name = 'IPy session'

509

name = 'IPy session'

503

aliases = ['ipy']

510

aliases = ['ipy']

504

511

505

def __init__(self, **options):

512

def __init__(self, **options):

513

"""

514

Create a new IPyLexer instance which dispatch to either an

515

IPythonCOnsoleLexer (if In prompts are present) or and IPythonLexer (if

516

In prompts are not present).

517

"""

518

# init docstring is necessary for docs not to fail to build do to parent

519

# docs referenceing a section in pygments docs.

506

self.python3 = get_bool_opt(options, 'python3', False)

520

self.python3 = get_bool_opt(options, 'python3', False)

507

if self.python3:

521

if self.python3:

508

self.aliases = ['ipy3']

522

self.aliases = ['ipy3']

509

else:

523

else:

510

self.aliases = ['ipy2', 'ipy']

524

self.aliases = ['ipy2', 'ipy']

511

525

512

Lexer.__init__(self, **options)

526

Lexer.__init__(self, **options)

513

527

514

self.IPythonLexer = IPythonLexer(**options)

528

self.IPythonLexer = IPythonLexer(**options)

515

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

529

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

516

530

517

def get_tokens_unprocessed(self, text):

531

def get_tokens_unprocessed(self, text):

518

# Search for the input prompt anywhere...this allows code blocks to

532

# Search for the input prompt anywhere...this allows code blocks to

519

# begin with comments as well.

533

# begin with comments as well.

520

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

534

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

521

lex = self.IPythonConsoleLexer

535

lex = self.IPythonConsoleLexer

522

else:

536

else:

523

lex = self.IPythonLexer

537

lex = self.IPythonLexer

524

for token in lex.get_tokens_unprocessed(text):

538

for token in lex.get_tokens_unprocessed(text):

525

yield token

539

yield token

526

540

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
             Defines a variety of Pygments lexers for highlighting IPython code.
             This includes:
                 IPythonLexer, IPython3Lexer
                     Lexers for pure IPython (python + magic/shell commands)
                 IPythonPartialTracebackLexer, IPythonTracebackLexer
                     Supports 2.x and 3.x via keyword `python3`.  The partial traceback
                     lexer reads everything but the Python code appearing in a traceback.
                     The full lexer combines the partial lexer with an IPython lexer.
                 IPythonConsoleLexer
                     A lexer for IPython console sessions, with support for tracebacks.
                 IPyLexer
                     A friendly lexer which examines the first line of text and from it,
                     decides whether to use an IPython lexer or an IPython console lexer.
                     This is probably the only lexer that needs to be explicitly added
                     to Pygments.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             # Standard library
             import re
             # Third party
             from pygments.lexers import (
                 BashLexer, HtmlLexer, JavascriptLexer, RubyLexer, PerlLexer, PythonLexer,
                 Python3Lexer, TexLexer)
             from pygments.lexer import (
                 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
             )
             from pygments.token import (
                 Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
             )
             from pygments.util import get_bool_opt
             # Local
             line_re = re.compile('.*?\n')
             __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
                        'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
                        'IPythonConsoleLexer', 'IPyLexer']
             def build_ipy_lexer(python3):
                 """Builds IPython lexers depending on the value of `python3`.
                 The lexer inherits from an appropriate Python lexer and then adds
                 information about IPython specific keywords (i.e. magic commands,
                 shell commands, etc.)
                 Parameters
                 ----------
                 python3 : bool
                     If `True`, then build an IPython lexer from a Python 3 lexer.
                 """
                 # It would be nice to have a single IPython lexer class which takes
                 # a boolean `python3`.  But since there are two Python lexer classes,
                 # we will also have two IPython lexer classes.
                 if python3:
                     PyLexer = Python3Lexer
                     name = 'IPython3'
                     aliases = ['ipython3']
                     doc = """IPython3 Lexer"""
                 else:
                     PyLexer = PythonLexer
                     name = 'IPython'
                     aliases = ['ipython2', 'ipython']
                     doc = """IPython Lexer"""
                 ipython_tokens = [
                    (r'(?s)(\s*)(%%capture)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%debug)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?is)(\s*)(%%html)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(HtmlLexer))),
                     (r'(?s)(\s*)(%%javascript)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
                     (r'(?s)(\s*)(%%js)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(JavascriptLexer))),
                     (r'(?s)(\s*)(%%latex)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(TexLexer))),
                     (r'(?s)(\s*)(%%perl)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PerlLexer))),
                     (r'(?s)(\s*)(%%prun)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%pypy)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%python)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%python2)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PythonLexer))),
                     (r'(?s)(\s*)(%%python3)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(Python3Lexer))),
                     (r'(?s)(\s*)(%%ruby)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(RubyLexer))),
                     (r'(?s)(\s*)(%%time)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%timeit)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%writefile)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r'(?s)(\s*)(%%file)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(PyLexer))),
                     (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
                     (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
                     (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
                     (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
                     (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                             using(BashLexer), Text)),
                     (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
                     (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
                     (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
                     (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
                     (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
                 ]
                 tokens = PyLexer.tokens.copy()
                 tokens['root'] = ipython_tokens + tokens['root']
                 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
                          '__doc__': doc, 'tokens': tokens}
                 return type(name, (PyLexer,), attrs)
             IPython3Lexer = build_ipy_lexer(python3=True)
             IPythonLexer = build_ipy_lexer(python3=False)
             class IPythonPartialTracebackLexer(RegexLexer):
                 """
                 Partial lexer for IPython tracebacks.
                 Handles all the non-python output.
                 """
                 name = 'IPython Partial Traceback'
                 tokens = {
                     'root': [
                         # Tracebacks for syntax errors have a different style.
                         # For both types of tracebacks, we mark the first line with
                         # Generic.Traceback.  For syntax errors, we mark the filename
                         # as we mark the filenames for non-syntax tracebacks.
                         #
                         # These two regexps define how IPythonConsoleLexer finds a
                         # traceback.
                         #
                         ## Non-syntax traceback
                         (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
                         ## Syntax traceback
                         (r'^(  File)(.*)(, line )(\d+\n)',
                          bygroups(Generic.Traceback, Name.Namespace,
                                   Generic.Traceback, Literal.Number.Integer)),
                         # (Exception Identifier)(Whitespace)(Traceback Message)
                         (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
                          bygroups(Name.Exception, Generic.Whitespace, Text)),
                         # (Module/Filename)(Text)(Callee)(Function Signature)
                         # Better options for callee and function signature?
                         (r'(.*)( in )(.*)(\(.*\)\n)',
                          bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
                         # Regular line: (Whitespace)(Line Number)(Python Code)
                         (r'(\s*?)(\d+)(.*?\n)',
                          bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
                         # Emphasized line: (Arrow)(Line Number)(Python Code)
                         # Using Exception token so arrow color matches the Exception.
                         (r'(-*>?\s?)(\d+)(.*?\n)',
                          bygroups(Name.Exception, Literal.Number.Integer, Other)),
                         # (Exception Identifier)(Message)
                         (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
                          bygroups(Name.Exception, Text)),
                         # Tag everything else as Other, will be handled later.
                         (r'.*\n', Other),
                     ],
                 }
             class IPythonTracebackLexer(DelegatingLexer):
                 """
                 IPython traceback lexer.
                 For doctests, the tracebacks can be snipped as much as desired with the
                 exception to the lines that designate a traceback. For non-syntax error
                 tracebacks, this is the line of hyphens. For syntax error tracebacks,
                 this is the line which lists the File and line number.
                 """
                 # The lexer inherits from DelegatingLexer.  The "root" lexer is an
                 # appropriate IPython lexer, which depends on the value of the boolean
                 # `python3`.  First, we parse with the partial IPython traceback lexer.
                 # Then, any code marked with the "Other" token is delegated to the root
                 # lexer.
                 #
                 name = 'IPython Traceback'
                 aliases = ['ipythontb']
                 def __init__(self, **options):
+                    """
+                    A subclass of `DelegatingLexer` which delegates to the appropriate to either IPyLexer,
+                    IPythonPartialTracebackLexer.
+                    """
+                    # note we need a __init__ doc, as otherwise it inherits the doc from the super class
+                    # which will fail the documentation build as it references section of the pygments docs that
+                    # do not exists when building IPython's docs.
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3tb']
                     else:
                         self.aliases = ['ipython2tb', 'ipythontb']
                     if self.python3:
                         IPyLexer = IPython3Lexer
                     else:
                         IPyLexer = IPythonLexer
                     DelegatingLexer.__init__(self, IPyLexer,
                                              IPythonPartialTracebackLexer, **options)
             class IPythonConsoleLexer(Lexer):
                 """
                 An IPython console lexer for IPython code-blocks and doctests, such as:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: a = 'foo'
                         In [2]: a
                         Out[2]: 'foo'
                         In [3]: print(a)
                         foo
                 Support is also provided for IPython exceptions:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: raise Exception
                         Traceback (most recent call last):
                         ...
                         Exception
                 """
                 name = 'IPython console session'
                 aliases = ['ipythonconsole']
                 mimetypes = ['text/x-ipython-console']
                 # The regexps used to determine what is input and what is output.
                 # The default prompts for IPython are:
                 #
                 #    in           = 'In [#]: '
                 #    continuation = '   .D.: '
                 #    template     = 'Out[#]: '
                 #
                 # Where '#' is the 'prompt number' or 'execution count' and 'D'
                 # D is a number of dots  matching the width of the execution count
                 #
                 in1_regex = r'In \[[0-9]+\]: '
                 in2_regex = r'   \.\.+\.: '
                 out_regex = r'Out\[[0-9]+\]: '
                 #: The regex to determine when a traceback starts.
                 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^(  File)(.*)(, line )(\d+\n)')
                 def __init__(self, **options):
                     """Initialize the IPython console lexer.
                     Parameters
                     ----------
                     python3 : bool
                         If `True`, then the console inputs are parsed using a Python 3
                         lexer. Otherwise, they are parsed using a Python 2 lexer.
                     in1_regex : RegexObject
                         The compiled regular expression used to detect the start
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     in2_regex : RegexObject
                         The compiled regular expression used to detect the continuation
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     out_regex : RegexObject
                         The compiled regular expression used to detect outputs. If `None`,
                         then the default output prompt is assumed.
                     """
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3console']
                     else:
                         self.aliases = ['ipython2console', 'ipythonconsole']
                     in1_regex = options.get('in1_regex', self.in1_regex)
                     in2_regex = options.get('in2_regex', self.in2_regex)
                     out_regex = options.get('out_regex', self.out_regex)
                     # So that we can work with input and output prompts which have been
                     # rstrip'd (possibly by editors) we also need rstrip'd variants. If
                     # we do not do this, then such prompts will be tagged as 'output'.
                     # The reason can't just use the rstrip'd variants instead is because
                     # we want any whitespace associated with the prompt to be inserted
                     # with the token. This allows formatted code to be modified so as hide
                     # the appearance of prompts, with the whitespace included. One example
                     # use of this is in copybutton.js from the standard lib Python docs.
                     in1_regex_rstrip = in1_regex.rstrip() + '\n'
                     in2_regex_rstrip = in2_regex.rstrip() + '\n'
                     out_regex_rstrip = out_regex.rstrip() + '\n'
                     # Compile and save them all.
                     attrs = ['in1_regex', 'in2_regex', 'out_regex',
                              'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
                     for attr in attrs:
                         self.__setattr__(attr, re.compile(locals()[attr]))
                     Lexer.__init__(self, **options)
                     if self.python3:
                         pylexer = IPython3Lexer
                         tblexer = IPythonTracebackLexer
                     else:
                         pylexer = IPythonLexer
                         tblexer = IPythonTracebackLexer
                     self.pylexer = pylexer(**options)
                     self.tblexer = tblexer(**options)
                     self.reset()
                 def reset(self):
                     self.mode = 'output'
                     self.index = 0
                     self.buffer = u''
                     self.insertions = []
                 def buffered_tokens(self):
                     """
                     Generator of unprocessed tokens after doing insertions and before
                     changing to a new state.
                     """
                     if self.mode == 'output':
                         tokens = [(0, Generic.Output, self.buffer)]
                     elif self.mode == 'input':
                         tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
                     else: # traceback
                         tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
                     for i, t, v in do_insertions(self.insertions, tokens):
                         # All token indexes are relative to the buffer.
                         yield self.index + i, t, v
                     # Clear it all
                     self.index += len(self.buffer)
                     self.buffer = u''
                     self.insertions = []
                 def get_mci(self, line):
                     """
                     Parses the line and returns a 3-tuple: (mode, code, insertion).
                     `mode` is the next mode (or state) of the lexer, and is always equal
                     to 'input', 'output', or 'tb'.
                     `code` is a portion of the line that should be added to the buffer
                     corresponding to the next mode and eventually lexed by another lexer.
                     For example, `code` could be Python code if `mode` were 'input'.
                     `insertion` is a 3-tuple (index, token, text) representing an
                     unprocessed "token" that will be inserted into the stream of tokens
                     that are created from the buffer once we change modes. This is usually
                     the input or output prompt.
                     In general, the next mode depends on current mode and on the contents
                     of `line`.
                     """
                     # To reduce the number of regex match checks, we have multiple
                     # 'if' blocks instead of 'if-elif' blocks.
                     # Check for possible end of input
                     in2_match = self.in2_regex.match(line)
                     in2_match_rstrip = self.in2_regex_rstrip.match(line)
                     if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
                        in2_match_rstrip:
                         end_input = True
                     else:
                         end_input = False
                     if end_input and self.mode != 'tb':
                         # Only look for an end of input when not in tb mode.
                         # An ellipsis could appear within the traceback.
                         mode = 'output'
                         code = u''
                         insertion = (0, Generic.Prompt, line)
                         return mode, code, insertion
                     # Check for output prompt
                     out_match = self.out_regex.match(line)
                     out_match_rstrip = self.out_regex_rstrip.match(line)
                     if out_match or out_match_rstrip:
                         mode = 'output'
                         if out_match:
                             idx = out_match.end()
                         else:
                             idx = out_match_rstrip.end()
                         code = line[idx:]
                         # Use the 'heading' token for output.  We cannot use Generic.Error
                         # since it would conflict with exceptions.
                         insertion = (0, Generic.Heading, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (non stripped version)
                     in1_match = self.in1_regex.match(line)
                     if in1_match or (in2_match and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match:
                             idx = in1_match.end()
                         else: # in2_match
                             idx = in2_match.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (stripped version)
                     in1_match_rstrip = self.in1_regex_rstrip.match(line)
                     if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match_rstrip:
                             idx = in1_match_rstrip.end()
                         else: # in2_match
                             idx = in2_match_rstrip.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for traceback
                     if self.ipytb_start.match(line):
                         mode = 'tb'
                         code = line
                         insertion = None
                         return mode, code, insertion
                     # All other stuff...
                     if self.mode in ('input', 'output'):
                         # We assume all other text is output. Multiline input that
                         # does not use the continuation marker cannot be detected.
                         # For example, the 3 in the following is clearly output:
                         #
                         #    In [1]: print 3
                         #    3
                         #
                         # But the following second line is part of the input:
                         #
                         #    In [2]: while True:
                         #        print True
                         #
                         # In both cases, the 2nd line will be 'output'.
                         #
                         mode = 'output'
                     else:
                         mode = 'tb'
                     code = line
                     insertion = None
                     return mode, code, insertion
                 def get_tokens_unprocessed(self, text):
                     self.reset()
                     for match in line_re.finditer(text):
                         line = match.group()
                         mode, code, insertion = self.get_mci(line)
                         if mode != self.mode:
                             # Yield buffered tokens before transitioning to new mode.
                             for token in self.buffered_tokens():
                                 yield token
                             self.mode = mode
                         if insertion:
                             self.insertions.append((len(self.buffer), [insertion]))
                         self.buffer += code
                     for token in self.buffered_tokens():
                         yield token
             class IPyLexer(Lexer):
                 r"""
                 Primary lexer for all IPython-like code.
                 This is a simple helper lexer.  If the first line of the text begins with
                 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
                 lexer. If not, then the entire text is parsed with an IPython lexer.
                 The goal is to reduce the number of lexers that are registered
                 with Pygments.
                 """
                 name = 'IPy session'
                 aliases = ['ipy']
                 def __init__(self, **options):
+                    """
+                    Create a new IPyLexer instance which dispatch to either an
+                    IPythonCOnsoleLexer (if In prompts are present) or and IPythonLexer (if
+                    In prompts are not present).
+                    """
+                    # init docstring is necessary for docs not to fail to build do to parent
+                    # docs referenceing a section in pygments docs.
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipy3']
                     else:
                         self.aliases = ['ipy2', 'ipy']
                     Lexer.__init__(self, **options)
                     self.IPythonLexer = IPythonLexer(**options)
                     self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
                 def get_tokens_unprocessed(self, text):
                     # Search for the input prompt anywhere...this allows code blocks to
                     # begin with comments as well.
                     if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
                         lex = self.IPythonConsoleLexer
                     else:
                         lex = self.IPythonLexer
                     for token in lex.get_tokens_unprocessed(text):
                         yield token