upstream/ipython Commit - r13629:e35dc3b4

1

"""A custom pygments lexer for IPython code cells.

1

# -*- coding: utf-8 -*-

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

6

7

IPythonLexer

8

IPython3Lexer

9

Lexers for pure IPython (python + magic/shell commands)

10

11

IPythonPartialTracebackLexer

12

IPythonTracebackLexer

13

Supports 2.x and 3.x via keyword `python3`. The partial traceback

14

lexer reads everything but the Python code appearing in a traceback.

15

The full lexer combines the partial lexer with an IPython lexer.

16

17

IPythonConsoleLexer

18

A lexer for IPython console sessions, with support for tracebacks.

19

20

IPyLexer

21

A friendly lexer which examines the first line of text and from it,

22

decides whether to use an IPython lexer or an IPython console lexer.

23

This is probably the only lexer that needs to be explicitly added

24

to Pygments.

2

25

3

Informs The pygments highlighting library of the quirks of IPython's superset

4

of Python -- magic commands, !shell commands, etc.

5

"""

26

"""

6

#-----------------------------------------------------------------------------

27

7

28

# Standard library

8

#

29

import re

9

# Distributed under the terms of the Modified BSD License.

30

10

#

31

# Third party

11

# The full license is in the file COPYING.txt, distributed with this software.

32

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

12

#-----------------------------------------------------------------------------

33

from pygments.lexer import (

13

34

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

14

#-----------------------------------------------------------------------------

35

)

15

# Imports

36

from pygments.token import (

16

#-----------------------------------------------------------------------------

37

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

17

38

)

18

# Third-party imports

39

from pygments.util import get_bool_opt

19

from pygments.lexers import PythonLexer, BashLexer

40

20

from pygments.lexer import bygroups, using

41

21

from pygments.token import Keyword, Operator, Text

42

22

43

line_re = re.compile('.*?\n')

23

#-----------------------------------------------------------------------------

44

24

# Class declarations

45

ipython_tokens = [

25

#-----------------------------------------------------------------------------

46

(r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,

26

47

using(BashLexer), Text)),

27

class IPythonLexer(PythonLexer):

48

(r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),

28

"""

49

(r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

29

Pygments Lexer for use with IPython code. Inherits from

50

]

30

PythonLexer and adds information about IPython specific

51

31

keywords (i.e. magic commands, shell commands, etc.)

52

def build_ipy_lexer(python3):

32

"""

53

"""Builds IPython lexers depending on the value of `python3`.

33

54

34

#Basic properties

55

The lexer inherits from an appropriate Python lexer and then adds

35

name = 'IPython'

56

information about IPython specific keywords (i.e. magic commands,

36

aliases = ['ip', 'ipython']

57

shell commands, etc.)

37

filenames = ['*.ipy']

58

38

59

Parameters

39

#Highlighting information

60

----------

40

tokens = PythonLexer.tokens.copy()

61

python3 : bool

41

tokens['root'] = [

62

If `True`, then build an IPython lexer from a Python 3 lexer.

42

(r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,

63

43

using(BashLexer), Text)),

64

"""

44

(r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),

65

# It would be nice to have a single IPython lexer class which takes

45

(r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

66

# a boolean `python3`. But since there are two Python lexer classes,

46

] + tokens['root']

67

# we will also have two IPython lexer classes.

68

if python3:

69

PyLexer = Python3Lexer

70

clsname = 'IPython3Lexer'

71

name = 'IPython3'

72

aliases = ['ipython3']

73

doc = """IPython3 Lexer"""

74

else:

75

PyLexer = PythonLexer

76

clsname = 'IPythonLexer'

77

name = 'IPython'

78

aliases = ['ipython']

79

doc = """IPython Lexer"""

80

81

tokens = PyLexer.tokens.copy()

82

tokens['root'] = ipython_tokens + tokens['root']

83

84

attrs = {'name': name, 'aliases': aliases,

85

'__doc__': doc, 'tokens': tokens}

86

87

return type(name, (PyLexer,), attrs)

88

89

90

IPython3Lexer = build_ipy_lexer(python3=True)

91

IPythonLexer = build_ipy_lexer(python3=False)

92

93

94

class IPythonPartialTracebackLexer(RegexLexer):

95

"""

96

Partial lexer for IPython tracebacks.

97

98

Handles all the non-python output. This works for both Python 2.x and 3.x.

99

100

"""

101

name = 'IPython Partial Traceback'

102

103

tokens = {

104

'root': [

105

# Tracebacks for syntax errors have a different style.

106

# For both types of tracebacks, we mark the first line with

107

# Generic.Traceback. For syntax errors, we mark the filename

108

# as we mark the filenames for non-syntax tracebacks.

109

#

110

# These two regexps define how IPythonConsoleLexer finds a

111

# traceback.

112

#

113

## Non-syntax traceback

114

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

115

## Syntax traceback

116

(r'^( File)(.*)(, line )(\d+\n)',

117

bygroups(Generic.Traceback, Name.Namespace,

118

Generic.Traceback, Literal.Number.Integer)),

119

120

# (Exception Identifier)(Whitespace)(Traceback Message)

121

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

122

bygroups(Name.Exception, Generic.Whitespace, Text)),

123

# (Module/Filename)(Text)(Callee)(Function Signature)

124

# Better options for callee and function signature?

125

(r'(.*)( in )(.*)(\(.*\)\n)',

126

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

127

# Regular line: (Whitespace)(Line Number)(Python Code)

128

(r'(\s*?)(\d+)(.*?\n)',

129

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

130

# Emphasized line: (Arrow)(Line Number)(Python Code)

131

# Using Exception token so arrow color matches the Exception.

132

(r'(-*>?\s?)(\d+)(.*?\n)',

133

bygroups(Name.Exception, Literal.Number.Integer, Other)),

134

# (Exception Identifier)(Message)

135

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

136

bygroups(Name.Exception, Text)),

137

# Tag everything else as Other, will be handled later.

138

(r'.*\n', Other),

139

],

140

}

141

142

143

class IPythonTracebackLexer(DelegatingLexer):

144

"""

145

IPython traceback lexer.

146

147

For doctests, the tracebacks can be snipped as much as desired with the

148

exception to the lines that designate a traceback. For non-syntax error

149

tracebacks, this is the line of hyphens. For syntax error tracebacks,

150

this is the line which lists the File and line number.

151

152

"""

153

# The lexer inherits from DelegatingLexer. The "root" lexer is an

154

# appropriate IPython lexer, which depends on the value of the boolean

155

# `python3`. First, we parse with the partial IPython traceback lexer.

156

# Then, any code marked with the "Other" token is delegated to the root

157

# lexer.

158

#

159

name = 'IPython Traceback'

160

aliases = ['ipythontb']

161

162

def __init__(self, **options):

163

self.python3 = get_bool_opt(options, 'python3', False)

164

165

if self.python3:

166

IPyLexer = IPython3Lexer

167

else:

168

IPyLexer = IPythonLexer

169

170

DelegatingLexer.__init__(self, IPyLexer,

171

IPythonPartialTracebackLexer, **options)

172

173

174

class IPythonConsoleLexer(Lexer):

175

"""

176

An IPython console lexer for IPython code-blocks and doctests, such as:

177

178

.. sourcecode:: ipythoncon

179

180

In [1]: a = 'foo'

181

182

In [2]: a

183

Out[2]: 'foo'

184

185

In [3]: print a

186

foo

187

188

In [4]: 1 / 0

189

190

Support is also provided for IPython exceptions.

191

192

.. code-block:: ipythoncon

193

194

In [1]: raise Exception

195

---------------------------------------------------------------------------

196

Exception Traceback (most recent call last)

197

<ipython-input-1-fca2ab0ca76b> in <module>()

198

----> 1 raise Exception

199

200

Exception:

201

202

"""

203

name = 'IPython console session'

204

aliases = ['ipythoncon']

205

mimetypes = ['text/x-ipython-console']

206

207

# The regexps used to determine what is input and what is output. The

208

# input regex should be consistent with and also be the combination of

209

# the values of the `in_template` and `in2_templates`. For example, the

210

# defaults prompts are:

211

#

212

# c.PromptManager.in_template = 'In [\#]: '

213

# c.PromptManager.in2_template = ' .\D.: '

214

# c.PromptManager.out_template = 'Out[\#]: '

215

#

216

# Note, we do not include the trailing whitespace in the regex since

217

# we want to allow blank prompts (and editors often remove trailing

218

# whitespace).

219

#

220

in1_regex = r'In \[[0-9]+\]: '

221

in2_regex = r' \.\.+\.: '

222

out_regex = r'Out\[[0-9]+\]: '

223

224

#: The regex to determine when a traceback starts.

225

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

226

227

def __init__(self, **options):

228

"""Initialize the IPython console lexer.

229

230

Parameters

231

----------

232

python3 : bool

233

If `True`, then the console inputs are parsed using a Python 3

234

lexer. Otherwise, they are parsed using a Python 2 lexer.

235

in1_regex : RegexObject

236

The compiled regular expression used to detect the start

237

of inputs. Although the IPython configuration setting may have a

238

trailing whitespace, do not include it in the regex. If `None`,

239

then the default input prompt is assumed.

240

in2_regex : RegexObject

241

The compiled regular expression used to detect the continuation

242

of inputs. Although the IPython configuration setting may have a

243

trailing whitespace, do not include it in the regex. If `None`,

244

then the default input prompt is assumed.

245

out_regex : RegexObject

246

The compiled regular expression used to detect outputs. If `None`,

247

then the default output prompt is assumed.

248

249

"""

250

self.python3 = get_bool_opt(options, 'python3', False)

251

252

in1_regex = options.get('in1_regex', self.in1_regex)

253

in2_regex = options.get('in2_regex', self.in2_regex)

254

out_regex = options.get('out_regex', self.out_regex)

255

256

# So that we can work with input and output prompts which have been

257

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

258

# we do not do this, then such prompts will be tagged as 'output'.

259

# The reason can't just use the rstrip'd variants instead is because

260

# we want any whitespace associated with the prompt to be inserted

261

# with the token. This allows formatted code to be modified so as hide

262

# the appearance of prompts. For example, see copybutton.js.

263

in1_regex_rstrip = in1_regex.rstrip() + '\n'

264

in2_regex_rstrip = in2_regex.rstrip() + '\n'

265

out_regex_rstrip = out_regex.rstrip() + '\n'

266

267

# Compile and save them all.

268

attrs = ['in1_regex', 'in2_regex', 'out_regex',

269

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

270

for attr in attrs:

271

self.__setattr__(attr, re.compile(locals()[attr]))

272

273

Lexer.__init__(self, **options)

274

275

if self.python3:

276

pylexer = IPython3Lexer

277

tblexer = IPythonTracebackLexer

278

else:

279

pylexer = IPythonLexer

280

tblexer = IPythonTracebackLexer

281

282

self.pylexer = pylexer(**options)

283

self.tblexer = tblexer(**options)

284

285

self.reset()

286

287

def reset(self):

288

self.mode = 'output'

289

self.index = 0

290

self.buffer = u''

291

self.insertions = []

292

293

def buffered_tokens(self):

294

"""

295

Generator of unprocessed tokens after doing insertions and before

296

changing to a new state.

297

298

"""

299

if self.mode == 'output':

300

tokens = [(0, Generic.Output, self.buffer)]

301

elif self.mode == 'input':

302

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

303

else: # traceback

304

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

305

306

for i, t, v in do_insertions(self.insertions, tokens):

307

# All token indexes are relative to the buffer.

308

yield self.index + i, t, v

309

310

# Clear it all

311

self.index += len(self.buffer)

312

self.buffer = u''

313

self.insertions = []

314

315

def get_modecode(self, line):

316

"""

317

Returns the next mode and code to be added to the next mode's buffer.

318

319

The next mode depends on current mode and contents of line.

320

321

"""

322

# To reduce the number of regex match checks, we have multiple

323

# 'if' blocks instead of 'if-elif' blocks.

324

325

### Check for possible end of input

326

###

327

in2_match = self.in2_regex.match(line)

328

in2_match_rstrip = self.in2_regex_rstrip.match(line)

329

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

330

in2_match_rstrip:

331

end_input = True

332

else:

333

end_input = False

334

if end_input and self.mode != 'tb':

335

# Only look for an end of input when not in tb mode.

336

# An ellipsis could appear within the traceback.

337

mode = 'output'

338

code = u''

339

insertion = (0, Generic.Prompt, line)

340

return mode, code, insertion

341

342

### Check for output prompt

343

###

344

out_match = self.out_regex.match(line)

345

out_match_rstrip = self.out_regex_rstrip.match(line)

346

if out_match or out_match_rstrip:

347

mode = 'output'

348

if out_match:

349

idx = out_match.end()

350

else:

351

idx = out_match_rstrip.end()

352

code = line[idx:]

353

# Use the 'heading' token for output. We cannot use Generic.Error

354

# since it would conflict with exceptions.

355

insertion = (0, Generic.Heading, line[:idx])

356

return mode, code, insertion

357

358

359

### Check for input or continuation prompt (non stripped version)

360

###

361

in1_match = self.in1_regex.match(line)

362

if in1_match or (in2_match and self.mode != 'tb'):

363

# New input or when not in tb, continued input.

364

# We do not check for continued input when in tb since it is

365

# allowable to replace a long stack with an ellipsis.

366

mode = 'input'

367

if in1_match:

368

idx = in1_match.end()

369

else: # in2_match

370

idx = in2_match.end()

371

code = line[idx:]

372

insertion = (0, Generic.Prompt, line[:idx])

373

return mode, code, insertion

374

375

### Check for input or continuation prompt (stripped version)

376

###

377

in1_match_rstrip = self.in1_regex_rstrip.match(line)

378

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

379

# New input or when not in tb, continued input.

380

# We do not check for continued input when in tb since it is

381

# allowable to replace a long stack with an ellipsis.

382

mode = 'input'

383

if in1_match_rstrip:

384

idx = in1_match_rstrip.end()

385

else: # in2_match

386

idx = in2_match_rstrip.end()

387

code = line[idx:]

388

insertion = (0, Generic.Prompt, line[:idx])

389

return mode, code, insertion

390

391

### Check for traceback

392

###

393

if self.ipytb_start.match(line):

394

mode = 'tb'

395

code = line

396

insertion = None

397

return mode, code, insertion

398

399

### All other stuff...

400

###

401

if self.mode in ('input', 'output'):

402

# We assume all other text is output. Multiline input that

403

# does not use the continuation marker cannot be detected.

404

# For example, the 3 in the following is clearly output:

405

#

406

# In [1]: print 3

407

# 3

408

#

409

# But the following second line is part of the input:

410

#

411

# In [2]: while True:

412

# print True

413

#

414

# In both cases, the 2nd line will be 'output'.

415

#

416

mode = 'output'

417

else:

418

mode = 'tb'

419

420

code = line

421

insertion = None

422

423

return mode, code, insertion

424

425

def get_tokens_unprocessed(self, text):

426

self.reset()

427

for match in line_re.finditer(text):

428

line = match.group()

429

mode, code, insertion = self.get_modecode(line)

430

431

if mode != self.mode:

432

# Yield buffered tokens before transitioning to new mode.

433

for token in self.buffered_tokens():

434

yield token

435

self.mode = mode

436

437

if insertion:

438

self.insertions.append((len(self.buffer), [insertion]))

439

self.buffer += code

440

else:

441

for token in self.buffered_tokens():

442

yield token

443

444

class IPyLexer(Lexer):

445

"""

446

Primary lexer for all IPython-like code.

447

448

This is a simple helper lexer. If the first line of the text begins with

449

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

450

lexer. If not, then the entire text is parsed with an IPython lexer.

451

452

The goal is to reduce the number of lexers that are registered

453

with Pygments.

454

455

"""

456

name = 'IPy session'

457

aliases = ['ipy']

458

459

def __init__(self, **options):

460

self.python3 = get_bool_opt(options, 'python3', False)

461

Lexer.__init__(self, **options)

462

463

self.IPythonLexer = IPythonLexer(**options)

464

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

465

466

def get_tokens_unprocessed(self, text):

467

if re.match(r'(In \[[0-9]+\]:)', text.strip()):

468

lex = self.IPythonConsoleLexer

469

else:

470

lex = self.IPythonLexer

471

for token in lex.get_tokens_unprocessed(text):

472

yield token

473

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

This diff has been collapsed as it changes many lines, (515 lines changed) Show them Hide them
	@@ -1,46 +1,473 b''
	1	"""A custom pygments lexer for IPython code cells.	1	# -- coding: utf-8 --
			2	"""
			3	Defines a variety of Pygments lexers for highlighting IPython code.
			4
			5	This includes:
			6
			7	IPythonLexer
			8	IPython3Lexer
			9	Lexers for pure IPython (python + magic/shell commands)
			10
			11	IPythonPartialTracebackLexer
			12	IPythonTracebackLexer
			13	Supports 2.x and 3.x via keyword `python3`. The partial traceback
			14	lexer reads everything but the Python code appearing in a traceback.
			15	The full lexer combines the partial lexer with an IPython lexer.
			16
			17	IPythonConsoleLexer
			18	A lexer for IPython console sessions, with support for tracebacks.
			19
			20	IPyLexer
			21	A friendly lexer which examines the first line of text and from it,
			22	decides whether to use an IPython lexer or an IPython console lexer.
			23	This is probably the only lexer that needs to be explicitly added
			24	to Pygments.
	2		25
	3	Informs The pygments highlighting library of the quirks of IPython's superset
	4	of Python -- magic commands, !shell commands, etc.
	5	"""	26	"""
	6	#-----------------------------------------------------------------------------	27
	7	# Copyright (c) 2013, the IPython Development Team.	28	# Standard library
	8	#	29	import re
	9	# Distributed under the terms of the Modified BSD License.	30
	10	#	31	# Third party
	11	# The full license is in the file COPYING.txt, distributed with this software.	32	from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
	12	#-----------------------------------------------------------------------------	33	from pygments.lexer import (
	13		34	Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
	14	#-----------------------------------------------------------------------------	35	)
	15	# Imports	36	from pygments.token import (
	16	#-----------------------------------------------------------------------------	37	Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
	17		38	)
	18	# Third-party imports	39	from pygments.util import get_bool_opt
	19	from pygments.lexers import PythonLexer, BashLexer	40
	20	from pygments.lexer import bygroups, using	41
	21	from pygments.token import Keyword, Operator, Text	42
	22		43	line_re = re.compile('.*?\n')
	23	#-----------------------------------------------------------------------------	44
	24	# Class declarations	45	ipython_tokens = [
	25	#-----------------------------------------------------------------------------	46	(r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,
	26		47	using(BashLexer), Text)),
	27	class IPythonLexer(PythonLexer):	48	(r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),
	28	"""	49	(r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
	29	Pygments Lexer for use with IPython code. Inherits from	50	]
	30	PythonLexer and adds information about IPython specific	51
	31	keywords (i.e. magic commands, shell commands, etc.)	52	def build_ipy_lexer(python3):
	32	"""	53	"""Builds IPython lexers depending on the value of `python3`.
	33		54
	34	#Basic properties	55	The lexer inherits from an appropriate Python lexer and then adds
	35	name = 'IPython'	56	information about IPython specific keywords (i.e. magic commands,
	36	aliases = ['ip', 'ipython']	57	shell commands, etc.)
	37	filenames = ['*.ipy']	58
	38		59	Parameters
	39	#Highlighting information	60	----------
	40	tokens = PythonLexer.tokens.copy()	61	python3 : bool
	41	tokens['root'] = [	62	If `True`, then build an IPython lexer from a Python 3 lexer.
	42	(r'(\%+)(\w+)\s+(\.*)(\n)', bygroups(Operator, Keyword,	63
	43	using(BashLexer), Text)),	64	"""
	44	(r'(\%+)(\w+)\b', bygroups(Operator, Keyword)),	65	# It would be nice to have a single IPython lexer class which takes
	45	(r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),	66	# a boolean `python3`. But since there are two Python lexer classes,
	46	] + tokens['root']	67	# we will also have two IPython lexer classes.
			68	if python3:
			69	PyLexer = Python3Lexer
			70	clsname = 'IPython3Lexer'
			71	name = 'IPython3'
			72	aliases = ['ipython3']
			73	doc = """IPython3 Lexer"""
			74	else:
			75	PyLexer = PythonLexer
			76	clsname = 'IPythonLexer'
			77	name = 'IPython'
			78	aliases = ['ipython']
			79	doc = """IPython Lexer"""
			80
			81	tokens = PyLexer.tokens.copy()
			82	tokens['root'] = ipython_tokens + tokens['root']
			83
			84	attrs = {'name': name, 'aliases': aliases,
			85	'__doc__': doc, 'tokens': tokens}
			86
			87	return type(name, (PyLexer,), attrs)
			88
			89
			90	IPython3Lexer = build_ipy_lexer(python3=True)
			91	IPythonLexer = build_ipy_lexer(python3=False)
			92
			93
			94	class IPythonPartialTracebackLexer(RegexLexer):
			95	"""
			96	Partial lexer for IPython tracebacks.
			97
			98	Handles all the non-python output. This works for both Python 2.x and 3.x.
			99
			100	"""
			101	name = 'IPython Partial Traceback'
			102
			103	tokens = {
			104	'root': [
			105	# Tracebacks for syntax errors have a different style.
			106	# For both types of tracebacks, we mark the first line with
			107	# Generic.Traceback. For syntax errors, we mark the filename
			108	# as we mark the filenames for non-syntax tracebacks.
			109	#
			110	# These two regexps define how IPythonConsoleLexer finds a
			111	# traceback.
			112	#
			113	## Non-syntax traceback
			114	(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
			115	## Syntax traceback
			116	(r'^( File)(.*)(, line )(\d+\n)',
			117	bygroups(Generic.Traceback, Name.Namespace,
			118	Generic.Traceback, Literal.Number.Integer)),
			119
			120	# (Exception Identifier)(Whitespace)(Traceback Message)
			121	(r'(?u)(^[^\d\W]\w)(\s)(Traceback.*?\n)',
			122	bygroups(Name.Exception, Generic.Whitespace, Text)),
			123	# (Module/Filename)(Text)(Callee)(Function Signature)
			124	# Better options for callee and function signature?
			125	(r'(.)( in )(.)(\(.*\)\n)',
			126	bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
			127	# Regular line: (Whitespace)(Line Number)(Python Code)
			128	(r'(\s?)(\d+)(.?\n)',
			129	bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
			130	# Emphasized line: (Arrow)(Line Number)(Python Code)
			131	# Using Exception token so arrow color matches the Exception.
			132	(r'(->?\s?)(\d+)(.?\n)',
			133	bygroups(Name.Exception, Literal.Number.Integer, Other)),
			134	# (Exception Identifier)(Message)
			135	(r'(?u)(^[^\d\W]\w)(:.?\n)',
			136	bygroups(Name.Exception, Text)),
			137	# Tag everything else as Other, will be handled later.
			138	(r'.*\n', Other),
			139	],
			140	}
			141
			142
			143	class IPythonTracebackLexer(DelegatingLexer):
			144	"""
			145	IPython traceback lexer.
			146
			147	For doctests, the tracebacks can be snipped as much as desired with the
			148	exception to the lines that designate a traceback. For non-syntax error
			149	tracebacks, this is the line of hyphens. For syntax error tracebacks,
			150	this is the line which lists the File and line number.
			151
			152	"""
			153	# The lexer inherits from DelegatingLexer. The "root" lexer is an
			154	# appropriate IPython lexer, which depends on the value of the boolean
			155	# `python3`. First, we parse with the partial IPython traceback lexer.
			156	# Then, any code marked with the "Other" token is delegated to the root
			157	# lexer.
			158	#
			159	name = 'IPython Traceback'
			160	aliases = ['ipythontb']
			161
			162	def __init__(self, **options):
			163	self.python3 = get_bool_opt(options, 'python3', False)
			164
			165	if self.python3:
			166	IPyLexer = IPython3Lexer
			167	else:
			168	IPyLexer = IPythonLexer
			169
			170	DelegatingLexer.__init__(self, IPyLexer,
			171	IPythonPartialTracebackLexer, **options)
			172
			173
			174	class IPythonConsoleLexer(Lexer):
			175	"""
			176	An IPython console lexer for IPython code-blocks and doctests, such as:
			177
			178	.. sourcecode:: ipythoncon
			179
			180	In [1]: a = 'foo'
			181
			182	In [2]: a
			183	Out[2]: 'foo'
			184
			185	In [3]: print a
			186	foo
			187
			188	In [4]: 1 / 0
			189
			190	Support is also provided for IPython exceptions.
			191
			192	.. code-block:: ipythoncon
			193
			194	In [1]: raise Exception
			195	---------------------------------------------------------------------------
			196	Exception Traceback (most recent call last)
			197	<ipython-input-1-fca2ab0ca76b> in <module>()
			198	----> 1 raise Exception
			199
			200	Exception:
			201
			202	"""
			203	name = 'IPython console session'
			204	aliases = ['ipythoncon']
			205	mimetypes = ['text/x-ipython-console']
			206
			207	# The regexps used to determine what is input and what is output. The
			208	# input regex should be consistent with and also be the combination of
			209	# the values of the `in_template` and `in2_templates`. For example, the
			210	# defaults prompts are:
			211	#
			212	# c.PromptManager.in_template = 'In [\#]: '
			213	# c.PromptManager.in2_template = ' .\D.: '
			214	# c.PromptManager.out_template = 'Out[\#]: '
			215	#
			216	# Note, we do not include the trailing whitespace in the regex since
			217	# we want to allow blank prompts (and editors often remove trailing
			218	# whitespace).
			219	#
			220	in1_regex = r'In \[[0-9]+\]: '
			221	in2_regex = r' \.\.+\.: '
			222	out_regex = r'Out\[[0-9]+\]: '
			223
			224	#: The regex to determine when a traceback starts.
			225	ipytb_start = re.compile(r'^(\^C)?(-+\n)\|^( File)(.*)(, line )(\d+\n)')
			226
			227	def __init__(self, **options):
			228	"""Initialize the IPython console lexer.
			229
			230	Parameters
			231	----------
			232	python3 : bool
			233	If `True`, then the console inputs are parsed using a Python 3
			234	lexer. Otherwise, they are parsed using a Python 2 lexer.
			235	in1_regex : RegexObject
			236	The compiled regular expression used to detect the start
			237	of inputs. Although the IPython configuration setting may have a
			238	trailing whitespace, do not include it in the regex. If `None`,
			239	then the default input prompt is assumed.
			240	in2_regex : RegexObject
			241	The compiled regular expression used to detect the continuation
			242	of inputs. Although the IPython configuration setting may have a
			243	trailing whitespace, do not include it in the regex. If `None`,
			244	then the default input prompt is assumed.
			245	out_regex : RegexObject
			246	The compiled regular expression used to detect outputs. If `None`,
			247	then the default output prompt is assumed.
			248
			249	"""
			250	self.python3 = get_bool_opt(options, 'python3', False)
			251
			252	in1_regex = options.get('in1_regex', self.in1_regex)
			253	in2_regex = options.get('in2_regex', self.in2_regex)
			254	out_regex = options.get('out_regex', self.out_regex)
			255
			256	# So that we can work with input and output prompts which have been
			257	# rstrip'd (possibly by editors) we also need rstrip'd variants. If
			258	# we do not do this, then such prompts will be tagged as 'output'.
			259	# The reason can't just use the rstrip'd variants instead is because
			260	# we want any whitespace associated with the prompt to be inserted
			261	# with the token. This allows formatted code to be modified so as hide
			262	# the appearance of prompts. For example, see copybutton.js.
			263	in1_regex_rstrip = in1_regex.rstrip() + '\n'
			264	in2_regex_rstrip = in2_regex.rstrip() + '\n'
			265	out_regex_rstrip = out_regex.rstrip() + '\n'
			266
			267	# Compile and save them all.
			268	attrs = ['in1_regex', 'in2_regex', 'out_regex',
			269	'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
			270	for attr in attrs:
			271	self.__setattr__(attr, re.compile(locals()[attr]))
			272
			273	Lexer.__init__(self, **options)
			274
			275	if self.python3:
			276	pylexer = IPython3Lexer
			277	tblexer = IPythonTracebackLexer
			278	else:
			279	pylexer = IPythonLexer
			280	tblexer = IPythonTracebackLexer
			281
			282	self.pylexer = pylexer(**options)
			283	self.tblexer = tblexer(**options)
			284
			285	self.reset()
			286
			287	def reset(self):
			288	self.mode = 'output'
			289	self.index = 0
			290	self.buffer = u''
			291	self.insertions = []
			292
			293	def buffered_tokens(self):
			294	"""
			295	Generator of unprocessed tokens after doing insertions and before
			296	changing to a new state.
			297
			298	"""
			299	if self.mode == 'output':
			300	tokens = [(0, Generic.Output, self.buffer)]
			301	elif self.mode == 'input':
			302	tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
			303	else: # traceback
			304	tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
			305
			306	for i, t, v in do_insertions(self.insertions, tokens):
			307	# All token indexes are relative to the buffer.
			308	yield self.index + i, t, v
			309
			310	# Clear it all
			311	self.index += len(self.buffer)
			312	self.buffer = u''
			313	self.insertions = []
			314
			315	def get_modecode(self, line):
			316	"""
			317	Returns the next mode and code to be added to the next mode's buffer.
			318
			319	The next mode depends on current mode and contents of line.
			320
			321	"""
			322	# To reduce the number of regex match checks, we have multiple
			323	# 'if' blocks instead of 'if-elif' blocks.
			324
			325	### Check for possible end of input
			326	###
			327	in2_match = self.in2_regex.match(line)
			328	in2_match_rstrip = self.in2_regex_rstrip.match(line)
			329	if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
			330	in2_match_rstrip:
			331	end_input = True
			332	else:
			333	end_input = False
			334	if end_input and self.mode != 'tb':
			335	# Only look for an end of input when not in tb mode.
			336	# An ellipsis could appear within the traceback.
			337	mode = 'output'
			338	code = u''
			339	insertion = (0, Generic.Prompt, line)
			340	return mode, code, insertion
			341
			342	### Check for output prompt
			343	###
			344	out_match = self.out_regex.match(line)
			345	out_match_rstrip = self.out_regex_rstrip.match(line)
			346	if out_match or out_match_rstrip:
			347	mode = 'output'
			348	if out_match:
			349	idx = out_match.end()
			350	else:
			351	idx = out_match_rstrip.end()
			352	code = line[idx:]
			353	# Use the 'heading' token for output. We cannot use Generic.Error
			354	# since it would conflict with exceptions.
			355	insertion = (0, Generic.Heading, line[:idx])
			356	return mode, code, insertion
			357
			358
			359	### Check for input or continuation prompt (non stripped version)
			360	###
			361	in1_match = self.in1_regex.match(line)
			362	if in1_match or (in2_match and self.mode != 'tb'):
			363	# New input or when not in tb, continued input.
			364	# We do not check for continued input when in tb since it is
			365	# allowable to replace a long stack with an ellipsis.
			366	mode = 'input'
			367	if in1_match:
			368	idx = in1_match.end()
			369	else: # in2_match
			370	idx = in2_match.end()
			371	code = line[idx:]
			372	insertion = (0, Generic.Prompt, line[:idx])
			373	return mode, code, insertion
			374
			375	### Check for input or continuation prompt (stripped version)
			376	###
			377	in1_match_rstrip = self.in1_regex_rstrip.match(line)
			378	if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
			379	# New input or when not in tb, continued input.
			380	# We do not check for continued input when in tb since it is
			381	# allowable to replace a long stack with an ellipsis.
			382	mode = 'input'
			383	if in1_match_rstrip:
			384	idx = in1_match_rstrip.end()
			385	else: # in2_match
			386	idx = in2_match_rstrip.end()
			387	code = line[idx:]
			388	insertion = (0, Generic.Prompt, line[:idx])
			389	return mode, code, insertion
			390
			391	### Check for traceback
			392	###
			393	if self.ipytb_start.match(line):
			394	mode = 'tb'
			395	code = line
			396	insertion = None
			397	return mode, code, insertion
			398
			399	### All other stuff...
			400	###
			401	if self.mode in ('input', 'output'):
			402	# We assume all other text is output. Multiline input that
			403	# does not use the continuation marker cannot be detected.
			404	# For example, the 3 in the following is clearly output:
			405	#
			406	# In [1]: print 3
			407	# 3
			408	#
			409	# But the following second line is part of the input:
			410	#
			411	# In [2]: while True:
			412	# print True
			413	#
			414	# In both cases, the 2nd line will be 'output'.
			415	#
			416	mode = 'output'
			417	else:
			418	mode = 'tb'
			419
			420	code = line
			421	insertion = None
			422
			423	return mode, code, insertion
			424
			425	def get_tokens_unprocessed(self, text):
			426	self.reset()
			427	for match in line_re.finditer(text):
			428	line = match.group()
			429	mode, code, insertion = self.get_modecode(line)
			430
			431	if mode != self.mode:
			432	# Yield buffered tokens before transitioning to new mode.
			433	for token in self.buffered_tokens():
			434	yield token
			435	self.mode = mode
			436
			437	if insertion:
			438	self.insertions.append((len(self.buffer), [insertion]))
			439	self.buffer += code
			440	else:
			441	for token in self.buffered_tokens():
			442	yield token
			443
			444	class IPyLexer(Lexer):
			445	"""
			446	Primary lexer for all IPython-like code.
			447
			448	This is a simple helper lexer. If the first line of the text begins with
			449	"In \[[0-9]+\]:", then the entire text is parsed with an IPython console
			450	lexer. If not, then the entire text is parsed with an IPython lexer.
			451
			452	The goal is to reduce the number of lexers that are registered
			453	with Pygments.
			454
			455	"""
			456	name = 'IPy session'
			457	aliases = ['ipy']
			458
			459	def __init__(self, **options):
			460	self.python3 = get_bool_opt(options, 'python3', False)
			461	Lexer.__init__(self, **options)
			462
			463	self.IPythonLexer = IPythonLexer(**options)
			464	self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
			465
			466	def get_tokens_unprocessed(self, text):
			467	if re.match(r'(In \[[0-9]+\]:)', text.strip()):
			468	lex = self.IPythonConsoleLexer
			469	else:
			470	lex = self.IPythonLexer
			471	for token in lex.get_tokens_unprocessed(text):
			472	yield token
			473