upstream/ipython Commit - r20119:681b9d6f

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

5

This includes:

6

7

IPythonLexer, IPython3Lexer

7

IPythonLexer, IPython3Lexer

8

Lexers for pure IPython (python + magic/shell commands)

8

Lexers for pure IPython (python + magic/shell commands)

9

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

12

lexer reads everything but the Python code appearing in a traceback.

12

lexer reads everything but the Python code appearing in a traceback.

13

The full lexer combines the partial lexer with an IPython lexer.

13

The full lexer combines the partial lexer with an IPython lexer.

14

15

IPythonConsoleLexer

15

IPythonConsoleLexer

16

A lexer for IPython console sessions, with support for tracebacks.

16

A lexer for IPython console sessions, with support for tracebacks.

17

18

IPyLexer

18

IPyLexer

19

A friendly lexer which examines the first line of text and from it,

19

A friendly lexer which examines the first line of text and from it,

20

decides whether to use an IPython lexer or an IPython console lexer.

20

decides whether to use an IPython lexer or an IPython console lexer.

21

This is probably the only lexer that needs to be explicitly added

21

This is probably the only lexer that needs to be explicitly added

22

to Pygments.

22

to Pygments.

23

24

"""

24

"""

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

27

#

27

#

28

# Distributed under the terms of the Modified BSD License.

28

# Distributed under the terms of the Modified BSD License.

29

#

29

#

30

# The full license is in the file COPYING.txt, distributed with this software.

30

# The full license is in the file COPYING.txt, distributed with this software.

31

#-----------------------------------------------------------------------------

31

#-----------------------------------------------------------------------------

32

33

# Standard library

33

# Standard library

34

import re

34

import re

35

36

# Third party

36

# Third party

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

38

from pygments.lexer import (

38

from pygments.lexer import (

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

40

)

40

)

41

from pygments.token import (

41

from pygments.token import (

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

43

)

43

)

44

from pygments.util import get_bool_opt

44

from pygments.util import get_bool_opt

45

46

# Local

46

# Local

47

from IPython.testing.skipdoctest import skip_doctest

47

from IPython.testing.skipdoctest import skip_doctest

48

49

line_re = re.compile('.*?\n')

49

line_re = re.compile('.*?\n')

50

51

ipython_tokens = [

51

ipython_tokens = [

52

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

52

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

53

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

53

using(BashLexer), Text)),

54

using(BashLexer), Text)),

54

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

55

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

55

(r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

56

(r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

56

(r'^(.+)(=)(\s*)(!)(.+)(\n)', bygroups(

57

(r'^(.+)(=)(\s*)(!)(.+)(\n)', bygroups(

57

# With the limited syntax allowed on the l.h.s. of a shell capture,

58

# With the limited syntax allowed on the l.h.s. of a shell capture,

58

# we don't need to differentiate between Python 2 and 3.

59

# we don't need to differentiate between Python 2 and 3.

59

using(Python3Lexer), Operator, Text, Operator, using(BashLexer), Text)),

60

using(Python3Lexer), Operator, Text, Operator, using(BashLexer), Text)),

60

]

61

]

61

62

def build_ipy_lexer(python3):

63

def build_ipy_lexer(python3):

63

"""Builds IPython lexers depending on the value of `python3`.

64

"""Builds IPython lexers depending on the value of `python3`.

64

65

The lexer inherits from an appropriate Python lexer and then adds

66

The lexer inherits from an appropriate Python lexer and then adds

66

information about IPython specific keywords (i.e. magic commands,

67

information about IPython specific keywords (i.e. magic commands,

67

shell commands, etc.)

68

shell commands, etc.)

68

69

Parameters

70

Parameters

70

----------

71

----------

71

python3 : bool

72

python3 : bool

72

If `True`, then build an IPython lexer from a Python 3 lexer.

73

If `True`, then build an IPython lexer from a Python 3 lexer.

73

74

"""

75

"""

75

# It would be nice to have a single IPython lexer class which takes

76

# It would be nice to have a single IPython lexer class which takes

76

# a boolean `python3`. But since there are two Python lexer classes,

77

# a boolean `python3`. But since there are two Python lexer classes,

77

# we will also have two IPython lexer classes.

78

# we will also have two IPython lexer classes.

78

if python3:

79

if python3:

79

PyLexer = Python3Lexer

80

PyLexer = Python3Lexer

80

clsname = 'IPython3Lexer'

81

clsname = 'IPython3Lexer'

81

name = 'IPython3'

82

name = 'IPython3'

82

aliases = ['ipython3']

83

aliases = ['ipython3']

83

doc = """IPython3 Lexer"""

84

doc = """IPython3 Lexer"""

84

else:

85

else:

85

PyLexer = PythonLexer

86

PyLexer = PythonLexer

86

clsname = 'IPythonLexer'

87

clsname = 'IPythonLexer'

87

name = 'IPython'

88

name = 'IPython'

88

aliases = ['ipython2', 'ipython']

89

aliases = ['ipython2', 'ipython']

89

doc = """IPython Lexer"""

90

doc = """IPython Lexer"""

90

91

tokens = PyLexer.tokens.copy()

92

tokens = PyLexer.tokens.copy()

92

tokens['root'] = ipython_tokens + tokens['root']

93

tokens['root'] = ipython_tokens + tokens['root']

93

94

attrs = {'name': name, 'aliases': aliases,

95

attrs = {'name': name, 'aliases': aliases,

95

'__doc__': doc, 'tokens': tokens}

96

'__doc__': doc, 'tokens': tokens}

96

97

return type(name, (PyLexer,), attrs)

98

return type(name, (PyLexer,), attrs)

98

99

100

IPython3Lexer = build_ipy_lexer(python3=True)

101

IPython3Lexer = build_ipy_lexer(python3=True)

101

IPythonLexer = build_ipy_lexer(python3=False)

102

IPythonLexer = build_ipy_lexer(python3=False)

102

103

104

class IPythonPartialTracebackLexer(RegexLexer):

105

class IPythonPartialTracebackLexer(RegexLexer):

105

"""

106

"""

106

Partial lexer for IPython tracebacks.

107

Partial lexer for IPython tracebacks.

107

108

Handles all the non-python output. This works for both Python 2.x and 3.x.

109

Handles all the non-python output. This works for both Python 2.x and 3.x.

109

110

"""

111

"""

111

name = 'IPython Partial Traceback'

112

name = 'IPython Partial Traceback'

112

113

tokens = {

114

tokens = {

114

'root': [

115

'root': [

115

# Tracebacks for syntax errors have a different style.

116

# Tracebacks for syntax errors have a different style.

116

# For both types of tracebacks, we mark the first line with

117

# For both types of tracebacks, we mark the first line with

117

# Generic.Traceback. For syntax errors, we mark the filename

118

# Generic.Traceback. For syntax errors, we mark the filename

118

# as we mark the filenames for non-syntax tracebacks.

119

# as we mark the filenames for non-syntax tracebacks.

119

#

120

#

120

# These two regexps define how IPythonConsoleLexer finds a

121

# These two regexps define how IPythonConsoleLexer finds a

121

# traceback.

122

# traceback.

122

#

123

#

123

## Non-syntax traceback

124

## Non-syntax traceback

124

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

125

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

125

## Syntax traceback

126

## Syntax traceback

126

(r'^( File)(.*)(, line )(\d+\n)',

127

(r'^( File)(.*)(, line )(\d+\n)',

127

bygroups(Generic.Traceback, Name.Namespace,

128

bygroups(Generic.Traceback, Name.Namespace,

128

Generic.Traceback, Literal.Number.Integer)),

129

Generic.Traceback, Literal.Number.Integer)),

129

130

# (Exception Identifier)(Whitespace)(Traceback Message)

131

# (Exception Identifier)(Whitespace)(Traceback Message)

131

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

132

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

132

bygroups(Name.Exception, Generic.Whitespace, Text)),

133

bygroups(Name.Exception, Generic.Whitespace, Text)),

133

# (Module/Filename)(Text)(Callee)(Function Signature)

134

# (Module/Filename)(Text)(Callee)(Function Signature)

134

# Better options for callee and function signature?

135

# Better options for callee and function signature?

135

(r'(.*)( in )(.*)(\(.*\)\n)',

136

(r'(.*)( in )(.*)(\(.*\)\n)',

136

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

137

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

137

# Regular line: (Whitespace)(Line Number)(Python Code)

138

# Regular line: (Whitespace)(Line Number)(Python Code)

138

(r'(\s*?)(\d+)(.*?\n)',

139

(r'(\s*?)(\d+)(.*?\n)',

139

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

140

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

140

# Emphasized line: (Arrow)(Line Number)(Python Code)

141

# Emphasized line: (Arrow)(Line Number)(Python Code)

141

# Using Exception token so arrow color matches the Exception.

142

# Using Exception token so arrow color matches the Exception.

142

(r'(-*>?\s?)(\d+)(.*?\n)',

143

(r'(-*>?\s?)(\d+)(.*?\n)',

143

bygroups(Name.Exception, Literal.Number.Integer, Other)),

144

bygroups(Name.Exception, Literal.Number.Integer, Other)),

144

# (Exception Identifier)(Message)

145

# (Exception Identifier)(Message)

145

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

146

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

146

bygroups(Name.Exception, Text)),

147

bygroups(Name.Exception, Text)),

147

# Tag everything else as Other, will be handled later.

148

# Tag everything else as Other, will be handled later.

148

(r'.*\n', Other),

149

(r'.*\n', Other),

149

],

150

],

150

}

151

}

151

152

153

class IPythonTracebackLexer(DelegatingLexer):

154

class IPythonTracebackLexer(DelegatingLexer):

154

"""

155

"""

155

IPython traceback lexer.

156

IPython traceback lexer.

156

157

For doctests, the tracebacks can be snipped as much as desired with the

158

For doctests, the tracebacks can be snipped as much as desired with the

158

exception to the lines that designate a traceback. For non-syntax error

159

exception to the lines that designate a traceback. For non-syntax error

159

tracebacks, this is the line of hyphens. For syntax error tracebacks,

160

tracebacks, this is the line of hyphens. For syntax error tracebacks,

160

this is the line which lists the File and line number.

161

this is the line which lists the File and line number.

161

162

"""

163

"""

163

# The lexer inherits from DelegatingLexer. The "root" lexer is an

164

# The lexer inherits from DelegatingLexer. The "root" lexer is an

164

# appropriate IPython lexer, which depends on the value of the boolean

165

# appropriate IPython lexer, which depends on the value of the boolean

165

# `python3`. First, we parse with the partial IPython traceback lexer.

166

# `python3`. First, we parse with the partial IPython traceback lexer.

166

# Then, any code marked with the "Other" token is delegated to the root

167

# Then, any code marked with the "Other" token is delegated to the root

167

# lexer.

168

# lexer.

168

#

169

#

169

name = 'IPython Traceback'

170

name = 'IPython Traceback'

170

aliases = ['ipythontb']

171

aliases = ['ipythontb']

171

172

def __init__(self, **options):

173

def __init__(self, **options):

173

self.python3 = get_bool_opt(options, 'python3', False)

174

self.python3 = get_bool_opt(options, 'python3', False)

174

if self.python3:

175

if self.python3:

175

self.aliases = ['ipython3tb']

176

self.aliases = ['ipython3tb']

176

else:

177

else:

177

self.aliases = ['ipython2tb', 'ipythontb']

178

self.aliases = ['ipython2tb', 'ipythontb']

178

179

if self.python3:

180

if self.python3:

180

IPyLexer = IPython3Lexer

181

IPyLexer = IPython3Lexer

181

else:

182

else:

182

IPyLexer = IPythonLexer

183

IPyLexer = IPythonLexer

183

184

DelegatingLexer.__init__(self, IPyLexer,

185

DelegatingLexer.__init__(self, IPyLexer,

185

IPythonPartialTracebackLexer, **options)

186

IPythonPartialTracebackLexer, **options)

186

187

@skip_doctest

188

@skip_doctest

188

class IPythonConsoleLexer(Lexer):

189

class IPythonConsoleLexer(Lexer):

189

"""

190

"""

190

An IPython console lexer for IPython code-blocks and doctests, such as:

191

An IPython console lexer for IPython code-blocks and doctests, such as:

191

192

.. code-block:: rst

193

.. code-block:: rst

193

194

.. code-block:: ipythonconsole

195

.. code-block:: ipythonconsole

195

196

In [1]: a = 'foo'

197

In [1]: a = 'foo'

197

198

In [2]: a

199

In [2]: a

199

Out[2]: 'foo'

200

Out[2]: 'foo'

200

201

In [3]: print a

202

In [3]: print a

202

foo

203

foo

203

204

In [4]: 1 / 0

205

In [4]: 1 / 0

205

206

207

Support is also provided for IPython exceptions:

208

Support is also provided for IPython exceptions:

208

209

.. code-block:: rst

210

.. code-block:: rst

210

211

.. code-block:: ipythonconsole

212

.. code-block:: ipythonconsole

212

213

In [1]: raise Exception

214

In [1]: raise Exception

214

215

---------------------------------------------------------------------------

216

---------------------------------------------------------------------------

216

Exception Traceback (most recent call last)

217

Exception Traceback (most recent call last)

217

<ipython-input-1-fca2ab0ca76b> in <module>()

218

<ipython-input-1-fca2ab0ca76b> in <module>()

218

----> 1 raise Exception

219

----> 1 raise Exception

219

220

Exception:

221

Exception:

221

222

"""

223

"""

223

name = 'IPython console session'

224

name = 'IPython console session'

224

aliases = ['ipythonconsole']

225

aliases = ['ipythonconsole']

225

mimetypes = ['text/x-ipython-console']

226

mimetypes = ['text/x-ipython-console']

226

227

# The regexps used to determine what is input and what is output.

228

# The regexps used to determine what is input and what is output.

228

# The default prompts for IPython are:

229

# The default prompts for IPython are:

229

#

230

#

230

# c.PromptManager.in_template = 'In [\#]: '

231

# c.PromptManager.in_template = 'In [\#]: '

231

# c.PromptManager.in2_template = ' .\D.: '

232

# c.PromptManager.in2_template = ' .\D.: '

232

# c.PromptManager.out_template = 'Out[\#]: '

233

# c.PromptManager.out_template = 'Out[\#]: '

233

#

234

#

234

in1_regex = r'In \[[0-9]+\]: '

235

in1_regex = r'In \[[0-9]+\]: '

235

in2_regex = r' \.\.+\.: '

236

in2_regex = r' \.\.+\.: '

236

out_regex = r'Out\[[0-9]+\]: '

237

out_regex = r'Out\[[0-9]+\]: '

237

238

#: The regex to determine when a traceback starts.

239

#: The regex to determine when a traceback starts.

239

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

240

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

240

241

def __init__(self, **options):

242

def __init__(self, **options):

242

"""Initialize the IPython console lexer.

243

"""Initialize the IPython console lexer.

243

244

Parameters

245

Parameters

245

----------

246

----------

246

python3 : bool

247

python3 : bool

247

If `True`, then the console inputs are parsed using a Python 3

248

If `True`, then the console inputs are parsed using a Python 3

248

lexer. Otherwise, they are parsed using a Python 2 lexer.

249

lexer. Otherwise, they are parsed using a Python 2 lexer.

249

in1_regex : RegexObject

250

in1_regex : RegexObject

250

The compiled regular expression used to detect the start

251

The compiled regular expression used to detect the start

251

of inputs. Although the IPython configuration setting may have a

252

of inputs. Although the IPython configuration setting may have a

252

trailing whitespace, do not include it in the regex. If `None`,

253

trailing whitespace, do not include it in the regex. If `None`,

253

then the default input prompt is assumed.

254

then the default input prompt is assumed.

254

in2_regex : RegexObject

255

in2_regex : RegexObject

255

The compiled regular expression used to detect the continuation

256

The compiled regular expression used to detect the continuation

256

of inputs. Although the IPython configuration setting may have a

257

of inputs. Although the IPython configuration setting may have a

257

trailing whitespace, do not include it in the regex. If `None`,

258

trailing whitespace, do not include it in the regex. If `None`,

258

then the default input prompt is assumed.

259

then the default input prompt is assumed.

259

out_regex : RegexObject

260

out_regex : RegexObject

260

The compiled regular expression used to detect outputs. If `None`,

261

The compiled regular expression used to detect outputs. If `None`,

261

then the default output prompt is assumed.

262

then the default output prompt is assumed.

262

263

"""

264

"""

264

self.python3 = get_bool_opt(options, 'python3', False)

265

self.python3 = get_bool_opt(options, 'python3', False)

265

if self.python3:

266

if self.python3:

266

self.aliases = ['ipython3console']

267

self.aliases = ['ipython3console']

267

else:

268

else:

268

self.aliases = ['ipython2console', 'ipythonconsole']

269

self.aliases = ['ipython2console', 'ipythonconsole']

269

270

in1_regex = options.get('in1_regex', self.in1_regex)

271

in1_regex = options.get('in1_regex', self.in1_regex)

271

in2_regex = options.get('in2_regex', self.in2_regex)

272

in2_regex = options.get('in2_regex', self.in2_regex)

272

out_regex = options.get('out_regex', self.out_regex)

273

out_regex = options.get('out_regex', self.out_regex)

273

274

# So that we can work with input and output prompts which have been

275

# So that we can work with input and output prompts which have been

275

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

276

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

276

# we do not do this, then such prompts will be tagged as 'output'.

277

# we do not do this, then such prompts will be tagged as 'output'.

277

# The reason can't just use the rstrip'd variants instead is because

278

# The reason can't just use the rstrip'd variants instead is because

278

# we want any whitespace associated with the prompt to be inserted

279

# we want any whitespace associated with the prompt to be inserted

279

# with the token. This allows formatted code to be modified so as hide

280

# with the token. This allows formatted code to be modified so as hide

280

# the appearance of prompts, with the whitespace included. One example

281

# the appearance of prompts, with the whitespace included. One example

281

# use of this is in copybutton.js from the standard lib Python docs.

282

# use of this is in copybutton.js from the standard lib Python docs.

282

in1_regex_rstrip = in1_regex.rstrip() + '\n'

283

in1_regex_rstrip = in1_regex.rstrip() + '\n'

283

in2_regex_rstrip = in2_regex.rstrip() + '\n'

284

in2_regex_rstrip = in2_regex.rstrip() + '\n'

284

out_regex_rstrip = out_regex.rstrip() + '\n'

285

out_regex_rstrip = out_regex.rstrip() + '\n'

285

286

# Compile and save them all.

287

# Compile and save them all.

287

attrs = ['in1_regex', 'in2_regex', 'out_regex',

288

attrs = ['in1_regex', 'in2_regex', 'out_regex',

288

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

289

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

289

for attr in attrs:

290

for attr in attrs:

290

self.__setattr__(attr, re.compile(locals()[attr]))

291

self.__setattr__(attr, re.compile(locals()[attr]))

291

292

Lexer.__init__(self, **options)

293

Lexer.__init__(self, **options)

293

294

if self.python3:

295

if self.python3:

295

pylexer = IPython3Lexer

296

pylexer = IPython3Lexer

296

tblexer = IPythonTracebackLexer

297

tblexer = IPythonTracebackLexer

297

else:

298

else:

298

pylexer = IPythonLexer

299

pylexer = IPythonLexer

299

tblexer = IPythonTracebackLexer

300

tblexer = IPythonTracebackLexer

300

301

self.pylexer = pylexer(**options)

302

self.pylexer = pylexer(**options)

302

self.tblexer = tblexer(**options)

303

self.tblexer = tblexer(**options)

303

304

self.reset()

305

self.reset()

305

306

def reset(self):

307

def reset(self):

307

self.mode = 'output'

308

self.mode = 'output'

308

self.index = 0

309

self.index = 0

309

self.buffer = u''

310

self.buffer = u''

310

self.insertions = []

311

self.insertions = []

311

312

def buffered_tokens(self):

313

def buffered_tokens(self):

313

"""

314

"""

314

Generator of unprocessed tokens after doing insertions and before

315

Generator of unprocessed tokens after doing insertions and before

315

changing to a new state.

316

changing to a new state.

316

317

"""

318

"""

318

if self.mode == 'output':

319

if self.mode == 'output':

319

tokens = [(0, Generic.Output, self.buffer)]

320

tokens = [(0, Generic.Output, self.buffer)]

320

elif self.mode == 'input':

321

elif self.mode == 'input':

321

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

322

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

322

else: # traceback

323

else: # traceback

323

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

324

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

324

325

for i, t, v in do_insertions(self.insertions, tokens):

326

for i, t, v in do_insertions(self.insertions, tokens):

326

# All token indexes are relative to the buffer.

327

# All token indexes are relative to the buffer.

327

yield self.index + i, t, v

328

yield self.index + i, t, v

328

329

# Clear it all

330

# Clear it all

330

self.index += len(self.buffer)

331

self.index += len(self.buffer)

331

self.buffer = u''

332

self.buffer = u''

332

self.insertions = []

333

self.insertions = []

333

334

def get_mci(self, line):

335

def get_mci(self, line):

335

"""

336

"""

336

Parses the line and returns a 3-tuple: (mode, code, insertion).

337

Parses the line and returns a 3-tuple: (mode, code, insertion).

337

338

`mode` is the next mode (or state) of the lexer, and is always equal

339

`mode` is the next mode (or state) of the lexer, and is always equal

339

to 'input', 'output', or 'tb'.

340

to 'input', 'output', or 'tb'.

340

341

`code` is a portion of the line that should be added to the buffer

342

`code` is a portion of the line that should be added to the buffer

342

corresponding to the next mode and eventually lexed by another lexer.

343

corresponding to the next mode and eventually lexed by another lexer.

343

For example, `code` could be Python code if `mode` were 'input'.

344

For example, `code` could be Python code if `mode` were 'input'.

344

345

`insertion` is a 3-tuple (index, token, text) representing an

346

`insertion` is a 3-tuple (index, token, text) representing an

346

unprocessed "token" that will be inserted into the stream of tokens

347

unprocessed "token" that will be inserted into the stream of tokens

347

that are created from the buffer once we change modes. This is usually

348

that are created from the buffer once we change modes. This is usually

348

the input or output prompt.

349

the input or output prompt.

349

350

In general, the next mode depends on current mode and on the contents

351

In general, the next mode depends on current mode and on the contents

351

of `line`.

352

of `line`.

352

353

"""

354

"""

354

# To reduce the number of regex match checks, we have multiple

355

# To reduce the number of regex match checks, we have multiple

355

# 'if' blocks instead of 'if-elif' blocks.

356

# 'if' blocks instead of 'if-elif' blocks.

356

357

# Check for possible end of input

358

# Check for possible end of input

358

in2_match = self.in2_regex.match(line)

359

in2_match = self.in2_regex.match(line)

359

in2_match_rstrip = self.in2_regex_rstrip.match(line)

360

in2_match_rstrip = self.in2_regex_rstrip.match(line)

360

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

361

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

361

in2_match_rstrip:

362

in2_match_rstrip:

362

end_input = True

363

end_input = True

363

else:

364

else:

364

end_input = False

365

end_input = False

365

if end_input and self.mode != 'tb':

366

if end_input and self.mode != 'tb':

366

# Only look for an end of input when not in tb mode.

367

# Only look for an end of input when not in tb mode.

367

# An ellipsis could appear within the traceback.

368

# An ellipsis could appear within the traceback.

368

mode = 'output'

369

mode = 'output'

369

code = u''

370

code = u''

370

insertion = (0, Generic.Prompt, line)

371

insertion = (0, Generic.Prompt, line)

371

return mode, code, insertion

372

return mode, code, insertion

372

373

# Check for output prompt

374

# Check for output prompt

374

out_match = self.out_regex.match(line)

375

out_match = self.out_regex.match(line)

375

out_match_rstrip = self.out_regex_rstrip.match(line)

376

out_match_rstrip = self.out_regex_rstrip.match(line)

376

if out_match or out_match_rstrip:

377

if out_match or out_match_rstrip:

377

mode = 'output'

378

mode = 'output'

378

if out_match:

379

if out_match:

379

idx = out_match.end()

380

idx = out_match.end()

380

else:

381

else:

381

idx = out_match_rstrip.end()

382

idx = out_match_rstrip.end()

382

code = line[idx:]

383

code = line[idx:]

383

# Use the 'heading' token for output. We cannot use Generic.Error

384

# Use the 'heading' token for output. We cannot use Generic.Error

384

# since it would conflict with exceptions.

385

# since it would conflict with exceptions.

385

insertion = (0, Generic.Heading, line[:idx])

386

insertion = (0, Generic.Heading, line[:idx])

386

return mode, code, insertion

387

return mode, code, insertion

387

388

389

# Check for input or continuation prompt (non stripped version)

390

# Check for input or continuation prompt (non stripped version)

390

in1_match = self.in1_regex.match(line)

391

in1_match = self.in1_regex.match(line)

391

if in1_match or (in2_match and self.mode != 'tb'):

392

if in1_match or (in2_match and self.mode != 'tb'):

392

# New input or when not in tb, continued input.

393

# New input or when not in tb, continued input.

393

# We do not check for continued input when in tb since it is

394

# We do not check for continued input when in tb since it is

394

# allowable to replace a long stack with an ellipsis.

395

# allowable to replace a long stack with an ellipsis.

395

mode = 'input'

396

mode = 'input'

396

if in1_match:

397

if in1_match:

397

idx = in1_match.end()

398

idx = in1_match.end()

398

else: # in2_match

399

else: # in2_match

399

idx = in2_match.end()

400

idx = in2_match.end()

400

code = line[idx:]

401

code = line[idx:]

401

insertion = (0, Generic.Prompt, line[:idx])

402

insertion = (0, Generic.Prompt, line[:idx])

402

return mode, code, insertion

403

return mode, code, insertion

403

404

# Check for input or continuation prompt (stripped version)

405

# Check for input or continuation prompt (stripped version)

405

in1_match_rstrip = self.in1_regex_rstrip.match(line)

406

in1_match_rstrip = self.in1_regex_rstrip.match(line)

406

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

407

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

407

# New input or when not in tb, continued input.

408

# New input or when not in tb, continued input.

408

# We do not check for continued input when in tb since it is

409

# We do not check for continued input when in tb since it is

409

# allowable to replace a long stack with an ellipsis.

410

# allowable to replace a long stack with an ellipsis.

410

mode = 'input'

411

mode = 'input'

411

if in1_match_rstrip:

412

if in1_match_rstrip:

412

idx = in1_match_rstrip.end()

413

idx = in1_match_rstrip.end()

413

else: # in2_match

414

else: # in2_match

414

idx = in2_match_rstrip.end()

415

idx = in2_match_rstrip.end()

415

code = line[idx:]

416

code = line[idx:]

416

insertion = (0, Generic.Prompt, line[:idx])

417

insertion = (0, Generic.Prompt, line[:idx])

417

return mode, code, insertion

418

return mode, code, insertion

418

419

# Check for traceback

420

# Check for traceback

420

if self.ipytb_start.match(line):

421

if self.ipytb_start.match(line):

421

mode = 'tb'

422

mode = 'tb'

422

code = line

423

code = line

423

insertion = None

424

insertion = None

424

return mode, code, insertion

425

return mode, code, insertion

425

426

# All other stuff...

427

# All other stuff...

427

if self.mode in ('input', 'output'):

428

if self.mode in ('input', 'output'):

428

# We assume all other text is output. Multiline input that

429

# We assume all other text is output. Multiline input that

429

# does not use the continuation marker cannot be detected.

430

# does not use the continuation marker cannot be detected.

430

# For example, the 3 in the following is clearly output:

431

# For example, the 3 in the following is clearly output:

431

#

432

#

432

# In [1]: print 3

433

# In [1]: print 3

433

# 3

434

# 3

434

#

435

#

435

# But the following second line is part of the input:

436

# But the following second line is part of the input:

436

#

437

#

437

# In [2]: while True:

438

# In [2]: while True:

438

# print True

439

# print True

439

#

440

#

440

# In both cases, the 2nd line will be 'output'.

441

# In both cases, the 2nd line will be 'output'.

441

#

442

#

442

mode = 'output'

443

mode = 'output'

443

else:

444

else:

444

mode = 'tb'

445

mode = 'tb'

445

446

code = line

447

code = line

447

insertion = None

448

insertion = None

448

449

return mode, code, insertion

450

return mode, code, insertion

450

451

def get_tokens_unprocessed(self, text):

452

def get_tokens_unprocessed(self, text):

452

self.reset()

453

self.reset()

453

for match in line_re.finditer(text):

454

for match in line_re.finditer(text):

454

line = match.group()

455

line = match.group()

455

mode, code, insertion = self.get_mci(line)

456

mode, code, insertion = self.get_mci(line)

456

457

if mode != self.mode:

458

if mode != self.mode:

458

# Yield buffered tokens before transitioning to new mode.

459

# Yield buffered tokens before transitioning to new mode.

459

for token in self.buffered_tokens():

460

for token in self.buffered_tokens():

460

yield token

461

yield token

461

self.mode = mode

462

self.mode = mode

462

463

if insertion:

464

if insertion:

464

self.insertions.append((len(self.buffer), [insertion]))

465

self.insertions.append((len(self.buffer), [insertion]))

465

self.buffer += code

466

self.buffer += code

466

else:

467

else:

467

for token in self.buffered_tokens():

468

for token in self.buffered_tokens():

468

yield token

469

yield token

469

470

class IPyLexer(Lexer):

471

class IPyLexer(Lexer):

471

"""

472

"""

472

Primary lexer for all IPython-like code.

473

Primary lexer for all IPython-like code.

473

474

This is a simple helper lexer. If the first line of the text begins with

475

This is a simple helper lexer. If the first line of the text begins with

475

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

476

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

476

lexer. If not, then the entire text is parsed with an IPython lexer.

477

lexer. If not, then the entire text is parsed with an IPython lexer.

477

478

The goal is to reduce the number of lexers that are registered

479

The goal is to reduce the number of lexers that are registered

479

with Pygments.

480

with Pygments.

480

481

"""

482

"""

482

name = 'IPy session'

483

name = 'IPy session'

483

aliases = ['ipy']

484

aliases = ['ipy']

484

485

def __init__(self, **options):

486

def __init__(self, **options):

486

self.python3 = get_bool_opt(options, 'python3', False)

487

self.python3 = get_bool_opt(options, 'python3', False)

487

if self.python3:

488

if self.python3:

488

self.aliases = ['ipy3']

489

self.aliases = ['ipy3']

489

else:

490

else:

490

self.aliases = ['ipy2', 'ipy']

491

self.aliases = ['ipy2', 'ipy']

491

492

Lexer.__init__(self, **options)

493

Lexer.__init__(self, **options)

493

494

self.IPythonLexer = IPythonLexer(**options)

495

self.IPythonLexer = IPythonLexer(**options)

495

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

496

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

496

497

def get_tokens_unprocessed(self, text):

498

def get_tokens_unprocessed(self, text):

498

# Search for the input prompt anywhere...this allows code blocks to

499

# Search for the input prompt anywhere...this allows code blocks to

499

# begin with comments as well.

500

# begin with comments as well.

500

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

501

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

501

lex = self.IPythonConsoleLexer

502

lex = self.IPythonConsoleLexer

502

else:

503

else:

503

lex = self.IPythonLexer

504

lex = self.IPythonLexer

504

for token in lex.get_tokens_unprocessed(text):

505

for token in lex.get_tokens_unprocessed(text):

505

yield token

506

yield token

506

507

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
             Defines a variety of Pygments lexers for highlighting IPython code.
             This includes:
                 IPythonLexer, IPython3Lexer
                     Lexers for pure IPython (python + magic/shell commands)
                 IPythonPartialTracebackLexer, IPythonTracebackLexer
                     Supports 2.x and 3.x via keyword `python3`.  The partial traceback
                     lexer reads everything but the Python code appearing in a traceback.
                     The full lexer combines the partial lexer with an IPython lexer.
                 IPythonConsoleLexer
                     A lexer for IPython console sessions, with support for tracebacks.
                 IPyLexer
                     A friendly lexer which examines the first line of text and from it,
                     decides whether to use an IPython lexer or an IPython console lexer.
                     This is probably the only lexer that needs to be explicitly added
                     to Pygments.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             # Standard library
             import re
             # Third party
             from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
             from pygments.lexer import (
                 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
             )
             from pygments.token import (
                 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
             )
             from pygments.util import get_bool_opt
             # Local
             from IPython.testing.skipdoctest import skip_doctest
             line_re = re.compile('.*?\n')
             ipython_tokens = [
+              (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
               (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                    using(BashLexer), Text)),
               (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
               (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
               (r'^(.+)(=)(\s*)(!)(.+)(\n)', bygroups(
                   # With the limited syntax allowed on the l.h.s. of a shell capture,
                   # we don't need to differentiate between Python 2 and 3.
                   using(Python3Lexer), Operator, Text, Operator, using(BashLexer), Text)),
             ]
             def build_ipy_lexer(python3):
                 """Builds IPython lexers depending on the value of `python3`.
                 The lexer inherits from an appropriate Python lexer and then adds
                 information about IPython specific keywords (i.e. magic commands,
                 shell commands, etc.)
                 Parameters
                 ----------
                 python3 : bool
                     If `True`, then build an IPython lexer from a Python 3 lexer.
                 """
                 # It would be nice to have a single IPython lexer class which takes
                 # a boolean `python3`.  But since there are two Python lexer classes,
                 # we will also have two IPython lexer classes.
                 if python3:
                     PyLexer = Python3Lexer
                     clsname = 'IPython3Lexer'
                     name = 'IPython3'
                     aliases = ['ipython3']
                     doc = """IPython3 Lexer"""
                 else:
                     PyLexer = PythonLexer
                     clsname = 'IPythonLexer'
                     name = 'IPython'
                     aliases = ['ipython2', 'ipython']
                     doc = """IPython Lexer"""
                 tokens = PyLexer.tokens.copy()
                 tokens['root'] = ipython_tokens + tokens['root']
                 attrs = {'name': name, 'aliases': aliases,
                          '__doc__': doc, 'tokens': tokens}
                 return type(name, (PyLexer,), attrs)
             IPython3Lexer = build_ipy_lexer(python3=True)
             IPythonLexer = build_ipy_lexer(python3=False)
             class IPythonPartialTracebackLexer(RegexLexer):
                 """
                 Partial lexer for IPython tracebacks.
                 Handles all the non-python output. This works for both Python 2.x and 3.x.
                 """
                 name = 'IPython Partial Traceback'
                 tokens = {
                     'root': [
                         # Tracebacks for syntax errors have a different style.
                         # For both types of tracebacks, we mark the first line with
                         # Generic.Traceback.  For syntax errors, we mark the filename
                         # as we mark the filenames for non-syntax tracebacks.
                         #
                         # These two regexps define how IPythonConsoleLexer finds a
                         # traceback.
                         #
                         ## Non-syntax traceback
                         (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
                         ## Syntax traceback
                         (r'^(  File)(.*)(, line )(\d+\n)',
                          bygroups(Generic.Traceback, Name.Namespace,
                                   Generic.Traceback, Literal.Number.Integer)),
                         # (Exception Identifier)(Whitespace)(Traceback Message)
                         (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
                          bygroups(Name.Exception, Generic.Whitespace, Text)),
                         # (Module/Filename)(Text)(Callee)(Function Signature)
                         # Better options for callee and function signature?
                         (r'(.*)( in )(.*)(\(.*\)\n)',
                          bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
                         # Regular line: (Whitespace)(Line Number)(Python Code)
                         (r'(\s*?)(\d+)(.*?\n)',
                          bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
                         # Emphasized line: (Arrow)(Line Number)(Python Code)
                         # Using Exception token so arrow color matches the Exception.
                         (r'(-*>?\s?)(\d+)(.*?\n)',
                          bygroups(Name.Exception, Literal.Number.Integer, Other)),
                         # (Exception Identifier)(Message)
                         (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
                          bygroups(Name.Exception, Text)),
                         # Tag everything else as Other, will be handled later.
                         (r'.*\n', Other),
                     ],
                 }
             class IPythonTracebackLexer(DelegatingLexer):
                 """
                 IPython traceback lexer.
                 For doctests, the tracebacks can be snipped as much as desired with the
                 exception to the lines that designate a traceback. For non-syntax error
                 tracebacks, this is the line of hyphens. For syntax error tracebacks,
                 this is the line which lists the File and line number.
                 """
                 # The lexer inherits from DelegatingLexer.  The "root" lexer is an
                 # appropriate IPython lexer, which depends on the value of the boolean
                 # `python3`.  First, we parse with the partial IPython traceback lexer.
                 # Then, any code marked with the "Other" token is delegated to the root
                 # lexer.
                 #
                 name = 'IPython Traceback'
                 aliases = ['ipythontb']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3tb']
                     else:
                         self.aliases = ['ipython2tb', 'ipythontb']
                     if self.python3:
                         IPyLexer = IPython3Lexer
                     else:
                         IPyLexer = IPythonLexer
                     DelegatingLexer.__init__(self, IPyLexer,
                                              IPythonPartialTracebackLexer, **options)
             @skip_doctest
             class IPythonConsoleLexer(Lexer):
                 """
                 An IPython console lexer for IPython code-blocks and doctests, such as:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: a = 'foo'
                         In [2]: a
                         Out[2]: 'foo'
                         In [3]: print a
                         foo
                         In [4]: 1 / 0
                 Support is also provided for IPython exceptions:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: raise Exception
                         ---------------------------------------------------------------------------
                         Exception                                 Traceback (most recent call last)
                         <ipython-input-1-fca2ab0ca76b> in <module>()
                         ----> 1 raise Exception
                         Exception:
                 """
                 name = 'IPython console session'
                 aliases = ['ipythonconsole']
                 mimetypes = ['text/x-ipython-console']
                 # The regexps used to determine what is input and what is output.
                 # The default prompts for IPython are:
                 #
                 #     c.PromptManager.in_template  = 'In [\#]: '
                 #     c.PromptManager.in2_template = '   .\D.: '
                 #     c.PromptManager.out_template = 'Out[\#]: '
                 #
                 in1_regex = r'In \[[0-9]+\]: '
                 in2_regex = r'   \.\.+\.: '
                 out_regex = r'Out\[[0-9]+\]: '
                 #: The regex to determine when a traceback starts.
                 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^(  File)(.*)(, line )(\d+\n)')
                 def __init__(self, **options):
                     """Initialize the IPython console lexer.
                     Parameters
                     ----------
                     python3 : bool
                         If `True`, then the console inputs are parsed using a Python 3
                         lexer. Otherwise, they are parsed using a Python 2 lexer.
                     in1_regex : RegexObject
                         The compiled regular expression used to detect the start
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     in2_regex : RegexObject
                         The compiled regular expression used to detect the continuation
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     out_regex : RegexObject
                         The compiled regular expression used to detect outputs. If `None`,
                         then the default output prompt is assumed.
                     """
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3console']
                     else:
                         self.aliases = ['ipython2console', 'ipythonconsole']
                     in1_regex = options.get('in1_regex', self.in1_regex)
                     in2_regex = options.get('in2_regex', self.in2_regex)
                     out_regex = options.get('out_regex', self.out_regex)
                     # So that we can work with input and output prompts which have been
                     # rstrip'd (possibly by editors) we also need rstrip'd variants. If
                     # we do not do this, then such prompts will be tagged as 'output'.
                     # The reason can't just use the rstrip'd variants instead is because
                     # we want any whitespace associated with the prompt to be inserted
                     # with the token. This allows formatted code to be modified so as hide
                     # the appearance of prompts, with the whitespace included. One example
                     # use of this is in copybutton.js from the standard lib Python docs.
                     in1_regex_rstrip = in1_regex.rstrip() + '\n'
                     in2_regex_rstrip = in2_regex.rstrip() + '\n'
                     out_regex_rstrip = out_regex.rstrip() + '\n'
                     # Compile and save them all.
                     attrs = ['in1_regex', 'in2_regex', 'out_regex',
                              'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
                     for attr in attrs:
                         self.__setattr__(attr, re.compile(locals()[attr]))
                     Lexer.__init__(self, **options)
                     if self.python3:
                         pylexer = IPython3Lexer
                         tblexer = IPythonTracebackLexer
                     else:
                         pylexer = IPythonLexer
                         tblexer = IPythonTracebackLexer
                     self.pylexer = pylexer(**options)
                     self.tblexer = tblexer(**options)
                     self.reset()
                 def reset(self):
                     self.mode = 'output'
                     self.index = 0
                     self.buffer = u''
                     self.insertions = []
                 def buffered_tokens(self):
                     """
                     Generator of unprocessed tokens after doing insertions and before
                     changing to a new state.
                     """
                     if self.mode == 'output':
                         tokens = [(0, Generic.Output, self.buffer)]
                     elif self.mode == 'input':
                         tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
                     else: # traceback
                         tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
                     for i, t, v in do_insertions(self.insertions, tokens):
                         # All token indexes are relative to the buffer.
                         yield self.index + i, t, v
                     # Clear it all
                     self.index += len(self.buffer)
                     self.buffer = u''
                     self.insertions = []
                 def get_mci(self, line):
                     """
                     Parses the line and returns a 3-tuple: (mode, code, insertion).
                     `mode` is the next mode (or state) of the lexer, and is always equal
                     to 'input', 'output', or 'tb'.
                     `code` is a portion of the line that should be added to the buffer
                     corresponding to the next mode and eventually lexed by another lexer.
                     For example, `code` could be Python code if `mode` were 'input'.
                     `insertion` is a 3-tuple (index, token, text) representing an
                     unprocessed "token" that will be inserted into the stream of tokens
                     that are created from the buffer once we change modes. This is usually
                     the input or output prompt.
                     In general, the next mode depends on current mode and on the contents
                     of `line`.
                     """
                     # To reduce the number of regex match checks, we have multiple
                     # 'if' blocks instead of 'if-elif' blocks.
                     # Check for possible end of input
                     in2_match = self.in2_regex.match(line)
                     in2_match_rstrip = self.in2_regex_rstrip.match(line)
                     if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
                        in2_match_rstrip:
                         end_input = True
                     else:
                         end_input = False
                     if end_input and self.mode != 'tb':
                         # Only look for an end of input when not in tb mode.
                         # An ellipsis could appear within the traceback.
                         mode = 'output'
                         code = u''
                         insertion = (0, Generic.Prompt, line)
                         return mode, code, insertion
                     # Check for output prompt
                     out_match = self.out_regex.match(line)
                     out_match_rstrip = self.out_regex_rstrip.match(line)
                     if out_match or out_match_rstrip:
                         mode = 'output'
                         if out_match:
                             idx = out_match.end()
                         else:
                             idx = out_match_rstrip.end()
                         code = line[idx:]
                         # Use the 'heading' token for output.  We cannot use Generic.Error
                         # since it would conflict with exceptions.
                         insertion = (0, Generic.Heading, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (non stripped version)
                     in1_match = self.in1_regex.match(line)
                     if in1_match or (in2_match and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match:
                             idx = in1_match.end()
                         else: # in2_match
                             idx = in2_match.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (stripped version)
                     in1_match_rstrip = self.in1_regex_rstrip.match(line)
                     if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match_rstrip:
                             idx = in1_match_rstrip.end()
                         else: # in2_match
                             idx = in2_match_rstrip.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for traceback
                     if self.ipytb_start.match(line):
                         mode = 'tb'
                         code = line
                         insertion = None
                         return mode, code, insertion
                     # All other stuff...
                     if self.mode in ('input', 'output'):
                         # We assume all other text is output. Multiline input that
                         # does not use the continuation marker cannot be detected.
                         # For example, the 3 in the following is clearly output:
                         #
                         #    In [1]: print 3
                         #    3
                         #
                         # But the following second line is part of the input:
                         #
                         #    In [2]: while True:
                         #        print True
                         #
                         # In both cases, the 2nd line will be 'output'.
                         #
                         mode = 'output'
                     else:
                         mode = 'tb'
                     code = line
                     insertion = None
                     return mode, code, insertion
                 def get_tokens_unprocessed(self, text):
                     self.reset()
                     for match in line_re.finditer(text):
                         line = match.group()
                         mode, code, insertion = self.get_mci(line)
                         if mode != self.mode:
                             # Yield buffered tokens before transitioning to new mode.
                             for token in self.buffered_tokens():
                                 yield token
                             self.mode = mode
                         if insertion:
                             self.insertions.append((len(self.buffer), [insertion]))
                         self.buffer += code
                     else:
                         for token in self.buffered_tokens():
                             yield token
             class IPyLexer(Lexer):
                 """
                 Primary lexer for all IPython-like code.
                 This is a simple helper lexer.  If the first line of the text begins with
                 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
                 lexer. If not, then the entire text is parsed with an IPython lexer.
                 The goal is to reduce the number of lexers that are registered
                 with Pygments.
                 """
                 name = 'IPy session'
                 aliases = ['ipy']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipy3']
                     else:
                         self.aliases = ['ipy2', 'ipy']
                     Lexer.__init__(self, **options)
                     self.IPythonLexer = IPythonLexer(**options)
                     self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
                 def get_tokens_unprocessed(self, text):
                     # Search for the input prompt anywhere...this allows code blocks to
                     # begin with comments as well.
                     if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
                         lex = self.IPythonConsoleLexer
                     else:
                         lex = self.IPythonLexer
                     for token in lex.get_tokens_unprocessed(text):
                         yield token

             """Test lexers module"""
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2014 The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             from pygments.token import Token
             from IPython.nbconvert.tests.base import TestsBase
             from .. import lexers
             #-----------------------------------------------------------------------------
             # Classes and functions
             #-----------------------------------------------------------------------------
             class TestLexers(TestsBase):
                 """Collection of lexers tests"""
                 def setUp(self):
                     self.lexer = lexers.IPythonLexer()
                 def testIPythonLexer(self):
                     fragment = '!echo $HOME\n'
                     tokens = [
                         (Token.Operator, '!'),
                         (Token.Name.Builtin, 'echo'),
                         (Token.Text, ' '),
                         (Token.Name.Variable, '$HOME'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
                     fragment_2 = 'x = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = %sx ' + fragment[1:]
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'sx'),
                         (Token.Text, ' '),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'f = %R function () {}\n'
                     tokens_2 = [
                         (Token.Name, 'f'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'R'),
                         (Token.Text, ' function () {}\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
+                    fragment_2 = '\t%%xyz\n$foo\n'
+                    tokens_2 = [
+                        (Token.Text, '\t'),
+                        (Token.Operator, '%%'),
+                        (Token.Keyword, 'xyz'),
+                        (Token.Text, '\n$foo\n'),
+                    ]
+                    self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))