upstream/ipython Commit - r20120:73c2656d

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

5

This includes:

6

7

IPythonLexer, IPython3Lexer

7

IPythonLexer, IPython3Lexer

8

Lexers for pure IPython (python + magic/shell commands)

8

Lexers for pure IPython (python + magic/shell commands)

9

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

12

lexer reads everything but the Python code appearing in a traceback.

12

lexer reads everything but the Python code appearing in a traceback.

13

The full lexer combines the partial lexer with an IPython lexer.

13

The full lexer combines the partial lexer with an IPython lexer.

14

15

IPythonConsoleLexer

15

IPythonConsoleLexer

16

A lexer for IPython console sessions, with support for tracebacks.

16

A lexer for IPython console sessions, with support for tracebacks.

17

18

IPyLexer

18

IPyLexer

19

A friendly lexer which examines the first line of text and from it,

19

A friendly lexer which examines the first line of text and from it,

20

decides whether to use an IPython lexer or an IPython console lexer.

20

decides whether to use an IPython lexer or an IPython console lexer.

21

This is probably the only lexer that needs to be explicitly added

21

This is probably the only lexer that needs to be explicitly added

22

to Pygments.

22

to Pygments.

23

24

"""

24

"""

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

27

#

27

#

28

# Distributed under the terms of the Modified BSD License.

28

# Distributed under the terms of the Modified BSD License.

29

#

29

#

30

# The full license is in the file COPYING.txt, distributed with this software.

30

# The full license is in the file COPYING.txt, distributed with this software.

31

#-----------------------------------------------------------------------------

31

#-----------------------------------------------------------------------------

32

33

# Standard library

33

# Standard library

34

import re

34

import re

35

36

# Third party

36

# Third party

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

38

from pygments.lexer import (

38

from pygments.lexer import (

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

40

)

40

)

41

from pygments.token import (

41

from pygments.token import (

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

43

)

43

)

44

from pygments.util import get_bool_opt

44

from pygments.util import get_bool_opt

45

46

# Local

46

# Local

47

from IPython.testing.skipdoctest import skip_doctest

47

from IPython.testing.skipdoctest import skip_doctest

48

49

line_re = re.compile('.*?\n')

49

line_re = re.compile('.*?\n')

50

51

ipython_tokens = [

51

ipython_tokens = [

52

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

52

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

53

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

53

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

54

using(BashLexer), Text)),

54

using(BashLexer), Text)),

55

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

55

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

56

(r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

56

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

57

(r'^(.+)(=)(\s*)(!)(.+)(\n)', bygroups(

57

(r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

58

# With the limited syntax allowed on the l.h.s. of a shell capture,

59

# we don't need to differentiate between Python 2 and 3.

60

using(Python3Lexer), Operator, Text, Operator, using(BashLexer), Text)),

61

]

58

]

62

59

63

def build_ipy_lexer(python3):

60

def build_ipy_lexer(python3):

64

"""Builds IPython lexers depending on the value of `python3`.

61

"""Builds IPython lexers depending on the value of `python3`.

65

62

66

The lexer inherits from an appropriate Python lexer and then adds

63

The lexer inherits from an appropriate Python lexer and then adds

67

information about IPython specific keywords (i.e. magic commands,

64

information about IPython specific keywords (i.e. magic commands,

68

shell commands, etc.)

65

shell commands, etc.)

69

66

70

Parameters

67

Parameters

71

----------

68

----------

72

python3 : bool

69

python3 : bool

73

If `True`, then build an IPython lexer from a Python 3 lexer.

70

If `True`, then build an IPython lexer from a Python 3 lexer.

74

71

75

"""

72

"""

76

# It would be nice to have a single IPython lexer class which takes

73

# It would be nice to have a single IPython lexer class which takes

77

# a boolean `python3`. But since there are two Python lexer classes,

74

# a boolean `python3`. But since there are two Python lexer classes,

78

# we will also have two IPython lexer classes.

75

# we will also have two IPython lexer classes.

79

if python3:

76

if python3:

80

PyLexer = Python3Lexer

77

PyLexer = Python3Lexer

81

clsname = 'IPython3Lexer'

78

clsname = 'IPython3Lexer'

82

name = 'IPython3'

79

name = 'IPython3'

83

aliases = ['ipython3']

80

aliases = ['ipython3']

84

doc = """IPython3 Lexer"""

81

doc = """IPython3 Lexer"""

85

else:

82

else:

86

PyLexer = PythonLexer

83

PyLexer = PythonLexer

87

clsname = 'IPythonLexer'

84

clsname = 'IPythonLexer'

88

name = 'IPython'

85

name = 'IPython'

89

aliases = ['ipython2', 'ipython']

86

aliases = ['ipython2', 'ipython']

90

doc = """IPython Lexer"""

87

doc = """IPython Lexer"""

91

88

92

tokens = PyLexer.tokens.copy()

89

tokens = PyLexer.tokens.copy()

93

tokens['root'] = ipython_tokens + tokens['root']

90

tokens['root'] = ipython_tokens + tokens['root']

94

91

95

attrs = {'name': name, 'aliases': aliases,

92

attrs = {'name': name, 'aliases': aliases,

96

'__doc__': doc, 'tokens': tokens}

93

'__doc__': doc, 'tokens': tokens}

97

94

98

return type(name, (PyLexer,), attrs)

95

return type(name, (PyLexer,), attrs)

99

96

100

97

101

IPython3Lexer = build_ipy_lexer(python3=True)

98

IPython3Lexer = build_ipy_lexer(python3=True)

102

IPythonLexer = build_ipy_lexer(python3=False)

99

IPythonLexer = build_ipy_lexer(python3=False)

103

100

104

101

105

class IPythonPartialTracebackLexer(RegexLexer):

102

class IPythonPartialTracebackLexer(RegexLexer):

106

"""

103

"""

107

Partial lexer for IPython tracebacks.

104

Partial lexer for IPython tracebacks.

108

105

109

Handles all the non-python output. This works for both Python 2.x and 3.x.

106

Handles all the non-python output. This works for both Python 2.x and 3.x.

110

107

111

"""

108

"""

112

name = 'IPython Partial Traceback'

109

name = 'IPython Partial Traceback'

113

110

114

tokens = {

111

tokens = {

115

'root': [

112

'root': [

116

# Tracebacks for syntax errors have a different style.

113

# Tracebacks for syntax errors have a different style.

117

# For both types of tracebacks, we mark the first line with

114

# For both types of tracebacks, we mark the first line with

118

# Generic.Traceback. For syntax errors, we mark the filename

115

# Generic.Traceback. For syntax errors, we mark the filename

119

# as we mark the filenames for non-syntax tracebacks.

116

# as we mark the filenames for non-syntax tracebacks.

120

#

117

#

121

# These two regexps define how IPythonConsoleLexer finds a

118

# These two regexps define how IPythonConsoleLexer finds a

122

# traceback.

119

# traceback.

123

#

120

#

124

## Non-syntax traceback

121

## Non-syntax traceback

125

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

122

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

126

## Syntax traceback

123

## Syntax traceback

127

(r'^( File)(.*)(, line )(\d+\n)',

124

(r'^( File)(.*)(, line )(\d+\n)',

128

bygroups(Generic.Traceback, Name.Namespace,

125

bygroups(Generic.Traceback, Name.Namespace,

129

Generic.Traceback, Literal.Number.Integer)),

126

Generic.Traceback, Literal.Number.Integer)),

130

127

131

# (Exception Identifier)(Whitespace)(Traceback Message)

128

# (Exception Identifier)(Whitespace)(Traceback Message)

132

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

129

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

133

bygroups(Name.Exception, Generic.Whitespace, Text)),

130

bygroups(Name.Exception, Generic.Whitespace, Text)),

134

# (Module/Filename)(Text)(Callee)(Function Signature)

131

# (Module/Filename)(Text)(Callee)(Function Signature)

135

# Better options for callee and function signature?

132

# Better options for callee and function signature?

136

(r'(.*)( in )(.*)(\(.*\)\n)',

133

(r'(.*)( in )(.*)(\(.*\)\n)',

137

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

134

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

138

# Regular line: (Whitespace)(Line Number)(Python Code)

135

# Regular line: (Whitespace)(Line Number)(Python Code)

139

(r'(\s*?)(\d+)(.*?\n)',

136

(r'(\s*?)(\d+)(.*?\n)',

140

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

137

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

141

# Emphasized line: (Arrow)(Line Number)(Python Code)

138

# Emphasized line: (Arrow)(Line Number)(Python Code)

142

# Using Exception token so arrow color matches the Exception.

139

# Using Exception token so arrow color matches the Exception.

143

(r'(-*>?\s?)(\d+)(.*?\n)',

140

(r'(-*>?\s?)(\d+)(.*?\n)',

144

bygroups(Name.Exception, Literal.Number.Integer, Other)),

141

bygroups(Name.Exception, Literal.Number.Integer, Other)),

145

# (Exception Identifier)(Message)

142

# (Exception Identifier)(Message)

146

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

143

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

147

bygroups(Name.Exception, Text)),

144

bygroups(Name.Exception, Text)),

148

# Tag everything else as Other, will be handled later.

145

# Tag everything else as Other, will be handled later.

149

(r'.*\n', Other),

146

(r'.*\n', Other),

150

],

147

],

151

}

148

}

152

149

153

150

154

class IPythonTracebackLexer(DelegatingLexer):

151

class IPythonTracebackLexer(DelegatingLexer):

155

"""

152

"""

156

IPython traceback lexer.

153

IPython traceback lexer.

157

154

158

For doctests, the tracebacks can be snipped as much as desired with the

155

For doctests, the tracebacks can be snipped as much as desired with the

159

exception to the lines that designate a traceback. For non-syntax error

156

exception to the lines that designate a traceback. For non-syntax error

160

tracebacks, this is the line of hyphens. For syntax error tracebacks,

157

tracebacks, this is the line of hyphens. For syntax error tracebacks,

161

this is the line which lists the File and line number.

158

this is the line which lists the File and line number.

162

159

163

"""

160

"""

164

# The lexer inherits from DelegatingLexer. The "root" lexer is an

161

# The lexer inherits from DelegatingLexer. The "root" lexer is an

165

# appropriate IPython lexer, which depends on the value of the boolean

162

# appropriate IPython lexer, which depends on the value of the boolean

166

# `python3`. First, we parse with the partial IPython traceback lexer.

163

# `python3`. First, we parse with the partial IPython traceback lexer.

167

# Then, any code marked with the "Other" token is delegated to the root

164

# Then, any code marked with the "Other" token is delegated to the root

168

# lexer.

165

# lexer.

169

#

166

#

170

name = 'IPython Traceback'

167

name = 'IPython Traceback'

171

aliases = ['ipythontb']

168

aliases = ['ipythontb']

172

169

173

def __init__(self, **options):

170

def __init__(self, **options):

174

self.python3 = get_bool_opt(options, 'python3', False)

171

self.python3 = get_bool_opt(options, 'python3', False)

175

if self.python3:

172

if self.python3:

176

self.aliases = ['ipython3tb']

173

self.aliases = ['ipython3tb']

177

else:

174

else:

178

self.aliases = ['ipython2tb', 'ipythontb']

175

self.aliases = ['ipython2tb', 'ipythontb']

179

176

180

if self.python3:

177

if self.python3:

181

IPyLexer = IPython3Lexer

178

IPyLexer = IPython3Lexer

182

else:

179

else:

183

IPyLexer = IPythonLexer

180

IPyLexer = IPythonLexer

184

181

185

DelegatingLexer.__init__(self, IPyLexer,

182

DelegatingLexer.__init__(self, IPyLexer,

186

IPythonPartialTracebackLexer, **options)

183

IPythonPartialTracebackLexer, **options)

187

184

188

@skip_doctest

185

@skip_doctest

189

class IPythonConsoleLexer(Lexer):

186

class IPythonConsoleLexer(Lexer):

190

"""

187

"""

191

An IPython console lexer for IPython code-blocks and doctests, such as:

188

An IPython console lexer for IPython code-blocks and doctests, such as:

192

189

193

.. code-block:: rst

190

.. code-block:: rst

194

191

195

.. code-block:: ipythonconsole

192

.. code-block:: ipythonconsole

196

193

197

In [1]: a = 'foo'

194

In [1]: a = 'foo'

198

195

199

In [2]: a

196

In [2]: a

200

Out[2]: 'foo'

197

Out[2]: 'foo'

201

198

202

In [3]: print a

199

In [3]: print a

203

foo

200

foo

204

201

205

In [4]: 1 / 0

202

In [4]: 1 / 0

206

203

207

204

208

Support is also provided for IPython exceptions:

205

Support is also provided for IPython exceptions:

209

206

210

.. code-block:: rst

207

.. code-block:: rst

211

208

212

.. code-block:: ipythonconsole

209

.. code-block:: ipythonconsole

213

210

214

In [1]: raise Exception

211

In [1]: raise Exception

215

212

216

---------------------------------------------------------------------------

213

---------------------------------------------------------------------------

217

Exception Traceback (most recent call last)

214

Exception Traceback (most recent call last)

218

<ipython-input-1-fca2ab0ca76b> in <module>()

215

<ipython-input-1-fca2ab0ca76b> in <module>()

219

----> 1 raise Exception

216

----> 1 raise Exception

220

217

221

Exception:

218

Exception:

222

219

223

"""

220

"""

224

name = 'IPython console session'

221

name = 'IPython console session'

225

aliases = ['ipythonconsole']

222

aliases = ['ipythonconsole']

226

mimetypes = ['text/x-ipython-console']

223

mimetypes = ['text/x-ipython-console']

227

224

228

# The regexps used to determine what is input and what is output.

225

# The regexps used to determine what is input and what is output.

229

# The default prompts for IPython are:

226

# The default prompts for IPython are:

230

#

227

#

231

# c.PromptManager.in_template = 'In [\#]: '

228

# c.PromptManager.in_template = 'In [\#]: '

232

# c.PromptManager.in2_template = ' .\D.: '

229

# c.PromptManager.in2_template = ' .\D.: '

233

# c.PromptManager.out_template = 'Out[\#]: '

230

# c.PromptManager.out_template = 'Out[\#]: '

234

#

231

#

235

in1_regex = r'In \[[0-9]+\]: '

232

in1_regex = r'In \[[0-9]+\]: '

236

in2_regex = r' \.\.+\.: '

233

in2_regex = r' \.\.+\.: '

237

out_regex = r'Out\[[0-9]+\]: '

234

out_regex = r'Out\[[0-9]+\]: '

238

235

239

#: The regex to determine when a traceback starts.

236

#: The regex to determine when a traceback starts.

240

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

237

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

241

238

242

def __init__(self, **options):

239

def __init__(self, **options):

243

"""Initialize the IPython console lexer.

240

"""Initialize the IPython console lexer.

244

241

245

Parameters

242

Parameters

246

----------

243

----------

247

python3 : bool

244

python3 : bool

248

If `True`, then the console inputs are parsed using a Python 3

245

If `True`, then the console inputs are parsed using a Python 3

249

lexer. Otherwise, they are parsed using a Python 2 lexer.

246

lexer. Otherwise, they are parsed using a Python 2 lexer.

250

in1_regex : RegexObject

247

in1_regex : RegexObject

251

The compiled regular expression used to detect the start

248

The compiled regular expression used to detect the start

252

of inputs. Although the IPython configuration setting may have a

249

of inputs. Although the IPython configuration setting may have a

253

trailing whitespace, do not include it in the regex. If `None`,

250

trailing whitespace, do not include it in the regex. If `None`,

254

then the default input prompt is assumed.

251

then the default input prompt is assumed.

255

in2_regex : RegexObject

252

in2_regex : RegexObject

256

The compiled regular expression used to detect the continuation

253

The compiled regular expression used to detect the continuation

257

of inputs. Although the IPython configuration setting may have a

254

of inputs. Although the IPython configuration setting may have a

258

trailing whitespace, do not include it in the regex. If `None`,

255

trailing whitespace, do not include it in the regex. If `None`,

259

then the default input prompt is assumed.

256

then the default input prompt is assumed.

260

out_regex : RegexObject

257

out_regex : RegexObject

261

The compiled regular expression used to detect outputs. If `None`,

258

The compiled regular expression used to detect outputs. If `None`,

262

then the default output prompt is assumed.

259

then the default output prompt is assumed.

263

260

264

"""

261

"""

265

self.python3 = get_bool_opt(options, 'python3', False)

262

self.python3 = get_bool_opt(options, 'python3', False)

266

if self.python3:

263

if self.python3:

267

self.aliases = ['ipython3console']

264

self.aliases = ['ipython3console']

268

else:

265

else:

269

self.aliases = ['ipython2console', 'ipythonconsole']

266

self.aliases = ['ipython2console', 'ipythonconsole']

270

267

271

in1_regex = options.get('in1_regex', self.in1_regex)

268

in1_regex = options.get('in1_regex', self.in1_regex)

272

in2_regex = options.get('in2_regex', self.in2_regex)

269

in2_regex = options.get('in2_regex', self.in2_regex)

273

out_regex = options.get('out_regex', self.out_regex)

270

out_regex = options.get('out_regex', self.out_regex)

274

271

275

# So that we can work with input and output prompts which have been

272

# So that we can work with input and output prompts which have been

276

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

273

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

277

# we do not do this, then such prompts will be tagged as 'output'.

274

# we do not do this, then such prompts will be tagged as 'output'.

278

# The reason can't just use the rstrip'd variants instead is because

275

# The reason can't just use the rstrip'd variants instead is because

279

# we want any whitespace associated with the prompt to be inserted

276

# we want any whitespace associated with the prompt to be inserted

280

# with the token. This allows formatted code to be modified so as hide

277

# with the token. This allows formatted code to be modified so as hide

281

# the appearance of prompts, with the whitespace included. One example

278

# the appearance of prompts, with the whitespace included. One example

282

# use of this is in copybutton.js from the standard lib Python docs.

279

# use of this is in copybutton.js from the standard lib Python docs.

283

in1_regex_rstrip = in1_regex.rstrip() + '\n'

280

in1_regex_rstrip = in1_regex.rstrip() + '\n'

284

in2_regex_rstrip = in2_regex.rstrip() + '\n'

281

in2_regex_rstrip = in2_regex.rstrip() + '\n'

285

out_regex_rstrip = out_regex.rstrip() + '\n'

282

out_regex_rstrip = out_regex.rstrip() + '\n'

286

283

287

# Compile and save them all.

284

# Compile and save them all.

288

attrs = ['in1_regex', 'in2_regex', 'out_regex',

285

attrs = ['in1_regex', 'in2_regex', 'out_regex',

289

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

286

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

290

for attr in attrs:

287

for attr in attrs:

291

self.__setattr__(attr, re.compile(locals()[attr]))

288

self.__setattr__(attr, re.compile(locals()[attr]))

292

289

293

Lexer.__init__(self, **options)

290

Lexer.__init__(self, **options)

294

291

295

if self.python3:

292

if self.python3:

296

pylexer = IPython3Lexer

293

pylexer = IPython3Lexer

297

tblexer = IPythonTracebackLexer

294

tblexer = IPythonTracebackLexer

298

else:

295

else:

299

pylexer = IPythonLexer

296

pylexer = IPythonLexer

300

tblexer = IPythonTracebackLexer

297

tblexer = IPythonTracebackLexer

301

298

302

self.pylexer = pylexer(**options)

299

self.pylexer = pylexer(**options)

303

self.tblexer = tblexer(**options)

300

self.tblexer = tblexer(**options)

304

301

305

self.reset()

302

self.reset()

306

303

307

def reset(self):

304

def reset(self):

308

self.mode = 'output'

305

self.mode = 'output'

309

self.index = 0

306

self.index = 0

310

self.buffer = u''

307

self.buffer = u''

311

self.insertions = []

308

self.insertions = []

312

309

313

def buffered_tokens(self):

310

def buffered_tokens(self):

314

"""

311

"""

315

Generator of unprocessed tokens after doing insertions and before

312

Generator of unprocessed tokens after doing insertions and before

316

changing to a new state.

313

changing to a new state.

317

314

318

"""

315

"""

319

if self.mode == 'output':

316

if self.mode == 'output':

320

tokens = [(0, Generic.Output, self.buffer)]

317

tokens = [(0, Generic.Output, self.buffer)]

321

elif self.mode == 'input':

318

elif self.mode == 'input':

322

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

319

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

323

else: # traceback

320

else: # traceback

324

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

321

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

325

322

326

for i, t, v in do_insertions(self.insertions, tokens):

323

for i, t, v in do_insertions(self.insertions, tokens):

327

# All token indexes are relative to the buffer.

324

# All token indexes are relative to the buffer.

328

yield self.index + i, t, v

325

yield self.index + i, t, v

329

326

330

# Clear it all

327

# Clear it all

331

self.index += len(self.buffer)

328

self.index += len(self.buffer)

332

self.buffer = u''

329

self.buffer = u''

333

self.insertions = []

330

self.insertions = []

334

331

335

def get_mci(self, line):

332

def get_mci(self, line):

336

"""

333

"""

337

Parses the line and returns a 3-tuple: (mode, code, insertion).

334

Parses the line and returns a 3-tuple: (mode, code, insertion).

338

335

339

`mode` is the next mode (or state) of the lexer, and is always equal

336

`mode` is the next mode (or state) of the lexer, and is always equal

340

to 'input', 'output', or 'tb'.

337

to 'input', 'output', or 'tb'.

341

338

342

`code` is a portion of the line that should be added to the buffer

339

`code` is a portion of the line that should be added to the buffer

343

corresponding to the next mode and eventually lexed by another lexer.

340

corresponding to the next mode and eventually lexed by another lexer.

344

For example, `code` could be Python code if `mode` were 'input'.

341

For example, `code` could be Python code if `mode` were 'input'.

345

342

346

`insertion` is a 3-tuple (index, token, text) representing an

343

`insertion` is a 3-tuple (index, token, text) representing an

347

unprocessed "token" that will be inserted into the stream of tokens

344

unprocessed "token" that will be inserted into the stream of tokens

348

that are created from the buffer once we change modes. This is usually

345

that are created from the buffer once we change modes. This is usually

349

the input or output prompt.

346

the input or output prompt.

350

347

351

In general, the next mode depends on current mode and on the contents

348

In general, the next mode depends on current mode and on the contents

352

of `line`.

349

of `line`.

353

350

354

"""

351

"""

355

# To reduce the number of regex match checks, we have multiple

352

# To reduce the number of regex match checks, we have multiple

356

# 'if' blocks instead of 'if-elif' blocks.

353

# 'if' blocks instead of 'if-elif' blocks.

357

354

358

# Check for possible end of input

355

# Check for possible end of input

359

in2_match = self.in2_regex.match(line)

356

in2_match = self.in2_regex.match(line)

360

in2_match_rstrip = self.in2_regex_rstrip.match(line)

357

in2_match_rstrip = self.in2_regex_rstrip.match(line)

361

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

358

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

362

in2_match_rstrip:

359

in2_match_rstrip:

363

end_input = True

360

end_input = True

364

else:

361

else:

365

end_input = False

362

end_input = False

366

if end_input and self.mode != 'tb':

363

if end_input and self.mode != 'tb':

367

# Only look for an end of input when not in tb mode.

364

# Only look for an end of input when not in tb mode.

368

# An ellipsis could appear within the traceback.

365

# An ellipsis could appear within the traceback.

369

mode = 'output'

366

mode = 'output'

370

code = u''

367

code = u''

371

insertion = (0, Generic.Prompt, line)

368

insertion = (0, Generic.Prompt, line)

372

return mode, code, insertion

369

return mode, code, insertion

373

370

374

# Check for output prompt

371

# Check for output prompt

375

out_match = self.out_regex.match(line)

372

out_match = self.out_regex.match(line)

376

out_match_rstrip = self.out_regex_rstrip.match(line)

373

out_match_rstrip = self.out_regex_rstrip.match(line)

377

if out_match or out_match_rstrip:

374

if out_match or out_match_rstrip:

378

mode = 'output'

375

mode = 'output'

379

if out_match:

376

if out_match:

380

idx = out_match.end()

377

idx = out_match.end()

381

else:

378

else:

382

idx = out_match_rstrip.end()

379

idx = out_match_rstrip.end()

383

code = line[idx:]

380

code = line[idx:]

384

# Use the 'heading' token for output. We cannot use Generic.Error

381

# Use the 'heading' token for output. We cannot use Generic.Error

385

# since it would conflict with exceptions.

382

# since it would conflict with exceptions.

386

insertion = (0, Generic.Heading, line[:idx])

383

insertion = (0, Generic.Heading, line[:idx])

387

return mode, code, insertion

384

return mode, code, insertion

388

385

389

386

390

# Check for input or continuation prompt (non stripped version)

387

# Check for input or continuation prompt (non stripped version)

391

in1_match = self.in1_regex.match(line)

388

in1_match = self.in1_regex.match(line)

392

if in1_match or (in2_match and self.mode != 'tb'):

389

if in1_match or (in2_match and self.mode != 'tb'):

393

# New input or when not in tb, continued input.

390

# New input or when not in tb, continued input.

394

# We do not check for continued input when in tb since it is

391

# We do not check for continued input when in tb since it is

395

# allowable to replace a long stack with an ellipsis.

392

# allowable to replace a long stack with an ellipsis.

396

mode = 'input'

393

mode = 'input'

397

if in1_match:

394

if in1_match:

398

idx = in1_match.end()

395

idx = in1_match.end()

399

else: # in2_match

396

else: # in2_match

400

idx = in2_match.end()

397

idx = in2_match.end()

401

code = line[idx:]

398

code = line[idx:]

402

insertion = (0, Generic.Prompt, line[:idx])

399

insertion = (0, Generic.Prompt, line[:idx])

403

return mode, code, insertion

400

return mode, code, insertion

404

401

405

# Check for input or continuation prompt (stripped version)

402

# Check for input or continuation prompt (stripped version)

406

in1_match_rstrip = self.in1_regex_rstrip.match(line)

403

in1_match_rstrip = self.in1_regex_rstrip.match(line)

407

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

404

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

408

# New input or when not in tb, continued input.

405

# New input or when not in tb, continued input.

409

# We do not check for continued input when in tb since it is

406

# We do not check for continued input when in tb since it is

410

# allowable to replace a long stack with an ellipsis.

407

# allowable to replace a long stack with an ellipsis.

411

mode = 'input'

408

mode = 'input'

412

if in1_match_rstrip:

409

if in1_match_rstrip:

413

idx = in1_match_rstrip.end()

410

idx = in1_match_rstrip.end()

414

else: # in2_match

411

else: # in2_match

415

idx = in2_match_rstrip.end()

412

idx = in2_match_rstrip.end()

416

code = line[idx:]

413

code = line[idx:]

417

insertion = (0, Generic.Prompt, line[:idx])

414

insertion = (0, Generic.Prompt, line[:idx])

418

return mode, code, insertion

415

return mode, code, insertion

419

416

420

# Check for traceback

417

# Check for traceback

421

if self.ipytb_start.match(line):

418

if self.ipytb_start.match(line):

422

mode = 'tb'

419

mode = 'tb'

423

code = line

420

code = line

424

insertion = None

421

insertion = None

425

return mode, code, insertion

422

return mode, code, insertion

426

423

427

# All other stuff...

424

# All other stuff...

428

if self.mode in ('input', 'output'):

425

if self.mode in ('input', 'output'):

429

# We assume all other text is output. Multiline input that

426

# We assume all other text is output. Multiline input that

430

# does not use the continuation marker cannot be detected.

427

# does not use the continuation marker cannot be detected.

431

# For example, the 3 in the following is clearly output:

428

# For example, the 3 in the following is clearly output:

432

#

429

#

433

# In [1]: print 3

430

# In [1]: print 3

434

# 3

431

# 3

435

#

432

#

436

# But the following second line is part of the input:

433

# But the following second line is part of the input:

437

#

434

#

438

# In [2]: while True:

435

# In [2]: while True:

439

# print True

436

# print True

440

#

437

#

441

# In both cases, the 2nd line will be 'output'.

438

# In both cases, the 2nd line will be 'output'.

442

#

439

#

443

mode = 'output'

440

mode = 'output'

444

else:

441

else:

445

mode = 'tb'

442

mode = 'tb'

446

443

447

code = line

444

code = line

448

insertion = None

445

insertion = None

449

446

450

return mode, code, insertion

447

return mode, code, insertion

451

448

452

def get_tokens_unprocessed(self, text):

449

def get_tokens_unprocessed(self, text):

453

self.reset()

450

self.reset()

454

for match in line_re.finditer(text):

451

for match in line_re.finditer(text):

455

line = match.group()

452

line = match.group()

456

mode, code, insertion = self.get_mci(line)

453

mode, code, insertion = self.get_mci(line)

457

454

458

if mode != self.mode:

455

if mode != self.mode:

459

# Yield buffered tokens before transitioning to new mode.

456

# Yield buffered tokens before transitioning to new mode.

460

for token in self.buffered_tokens():

457

for token in self.buffered_tokens():

461

yield token

458

yield token

462

self.mode = mode

459

self.mode = mode

463

460

464

if insertion:

461

if insertion:

465

self.insertions.append((len(self.buffer), [insertion]))

462

self.insertions.append((len(self.buffer), [insertion]))

466

self.buffer += code

463

self.buffer += code

467

else:

464

else:

468

for token in self.buffered_tokens():

465

for token in self.buffered_tokens():

469

yield token

466

yield token

470

467

471

class IPyLexer(Lexer):

468

class IPyLexer(Lexer):

472

"""

469

"""

473

Primary lexer for all IPython-like code.

470

Primary lexer for all IPython-like code.

474

471

475

This is a simple helper lexer. If the first line of the text begins with

472

This is a simple helper lexer. If the first line of the text begins with

476

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

473

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

477

lexer. If not, then the entire text is parsed with an IPython lexer.

474

lexer. If not, then the entire text is parsed with an IPython lexer.

478

475

479

The goal is to reduce the number of lexers that are registered

476

The goal is to reduce the number of lexers that are registered

480

with Pygments.

477

with Pygments.

481

478

482

"""

479

"""

483

name = 'IPy session'

480

name = 'IPy session'

484

aliases = ['ipy']

481

aliases = ['ipy']

485

482

486

def __init__(self, **options):

483

def __init__(self, **options):

487

self.python3 = get_bool_opt(options, 'python3', False)

484

self.python3 = get_bool_opt(options, 'python3', False)

488

if self.python3:

485

if self.python3:

489

self.aliases = ['ipy3']

486

self.aliases = ['ipy3']

490

else:

487

else:

491

self.aliases = ['ipy2', 'ipy']

488

self.aliases = ['ipy2', 'ipy']

492

489

493

Lexer.__init__(self, **options)

490

Lexer.__init__(self, **options)

494

491

495

self.IPythonLexer = IPythonLexer(**options)

492

self.IPythonLexer = IPythonLexer(**options)

496

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

493

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

497

494

498

def get_tokens_unprocessed(self, text):

495

def get_tokens_unprocessed(self, text):

499

# Search for the input prompt anywhere...this allows code blocks to

496

# Search for the input prompt anywhere...this allows code blocks to

500

# begin with comments as well.

497

# begin with comments as well.

501

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

498

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

502

lex = self.IPythonConsoleLexer

499

lex = self.IPythonConsoleLexer

503

else:

500

else:

504

lex = self.IPythonLexer

501

lex = self.IPythonLexer

505

for token in lex.get_tokens_unprocessed(text):

502

for token in lex.get_tokens_unprocessed(text):

506

yield token

503

yield token

507

504

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
             Defines a variety of Pygments lexers for highlighting IPython code.
             This includes:
                 IPythonLexer, IPython3Lexer
                     Lexers for pure IPython (python + magic/shell commands)
                 IPythonPartialTracebackLexer, IPythonTracebackLexer
                     Supports 2.x and 3.x via keyword `python3`.  The partial traceback
                     lexer reads everything but the Python code appearing in a traceback.
                     The full lexer combines the partial lexer with an IPython lexer.
                 IPythonConsoleLexer
                     A lexer for IPython console sessions, with support for tracebacks.
                 IPyLexer
                     A friendly lexer which examines the first line of text and from it,
                     decides whether to use an IPython lexer or an IPython console lexer.
                     This is probably the only lexer that needs to be explicitly added
                     to Pygments.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             # Standard library
             import re
             # Third party
             from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
             from pygments.lexer import (
                 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
             )
             from pygments.token import (
                 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
             )
             from pygments.util import get_bool_opt
             # Local
             from IPython.testing.skipdoctest import skip_doctest
             line_re = re.compile('.*?\n')
             ipython_tokens = [
               (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
               (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                    using(BashLexer), Text)),
               (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
-              (r'^(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
+              (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
-              (r'^(.+)(=)(\s*)(!)(.+)(\n)', bygroups(
+              (r'(!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
-                  # With the limited syntax allowed on the l.h.s. of a shell capture,
-                  # we don't need to differentiate between Python 2 and 3.
-                  using(Python3Lexer), Operator, Text, Operator, using(BashLexer), Text)),
             ]
             def build_ipy_lexer(python3):
                 """Builds IPython lexers depending on the value of `python3`.
                 The lexer inherits from an appropriate Python lexer and then adds
                 information about IPython specific keywords (i.e. magic commands,
                 shell commands, etc.)
                 Parameters
                 ----------
                 python3 : bool
                     If `True`, then build an IPython lexer from a Python 3 lexer.
                 """
                 # It would be nice to have a single IPython lexer class which takes
                 # a boolean `python3`.  But since there are two Python lexer classes,
                 # we will also have two IPython lexer classes.
                 if python3:
                     PyLexer = Python3Lexer
                     clsname = 'IPython3Lexer'
                     name = 'IPython3'
                     aliases = ['ipython3']
                     doc = """IPython3 Lexer"""
                 else:
                     PyLexer = PythonLexer
                     clsname = 'IPythonLexer'
                     name = 'IPython'
                     aliases = ['ipython2', 'ipython']
                     doc = """IPython Lexer"""
                 tokens = PyLexer.tokens.copy()
                 tokens['root'] = ipython_tokens + tokens['root']
                 attrs = {'name': name, 'aliases': aliases,
                          '__doc__': doc, 'tokens': tokens}
                 return type(name, (PyLexer,), attrs)
             IPython3Lexer = build_ipy_lexer(python3=True)
             IPythonLexer = build_ipy_lexer(python3=False)
             class IPythonPartialTracebackLexer(RegexLexer):
                 """
                 Partial lexer for IPython tracebacks.
                 Handles all the non-python output. This works for both Python 2.x and 3.x.
                 """
                 name = 'IPython Partial Traceback'
                 tokens = {
                     'root': [
                         # Tracebacks for syntax errors have a different style.
                         # For both types of tracebacks, we mark the first line with
                         # Generic.Traceback.  For syntax errors, we mark the filename
                         # as we mark the filenames for non-syntax tracebacks.
                         #
                         # These two regexps define how IPythonConsoleLexer finds a
                         # traceback.
                         #
                         ## Non-syntax traceback
                         (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
                         ## Syntax traceback
                         (r'^(  File)(.*)(, line )(\d+\n)',
                          bygroups(Generic.Traceback, Name.Namespace,
                                   Generic.Traceback, Literal.Number.Integer)),
                         # (Exception Identifier)(Whitespace)(Traceback Message)
                         (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
                          bygroups(Name.Exception, Generic.Whitespace, Text)),
                         # (Module/Filename)(Text)(Callee)(Function Signature)
                         # Better options for callee and function signature?
                         (r'(.*)( in )(.*)(\(.*\)\n)',
                          bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
                         # Regular line: (Whitespace)(Line Number)(Python Code)
                         (r'(\s*?)(\d+)(.*?\n)',
                          bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
                         # Emphasized line: (Arrow)(Line Number)(Python Code)
                         # Using Exception token so arrow color matches the Exception.
                         (r'(-*>?\s?)(\d+)(.*?\n)',
                          bygroups(Name.Exception, Literal.Number.Integer, Other)),
                         # (Exception Identifier)(Message)
                         (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
                          bygroups(Name.Exception, Text)),
                         # Tag everything else as Other, will be handled later.
                         (r'.*\n', Other),
                     ],
                 }
             class IPythonTracebackLexer(DelegatingLexer):
                 """
                 IPython traceback lexer.
                 For doctests, the tracebacks can be snipped as much as desired with the
                 exception to the lines that designate a traceback. For non-syntax error
                 tracebacks, this is the line of hyphens. For syntax error tracebacks,
                 this is the line which lists the File and line number.
                 """
                 # The lexer inherits from DelegatingLexer.  The "root" lexer is an
                 # appropriate IPython lexer, which depends on the value of the boolean
                 # `python3`.  First, we parse with the partial IPython traceback lexer.
                 # Then, any code marked with the "Other" token is delegated to the root
                 # lexer.
                 #
                 name = 'IPython Traceback'
                 aliases = ['ipythontb']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3tb']
                     else:
                         self.aliases = ['ipython2tb', 'ipythontb']
                     if self.python3:
                         IPyLexer = IPython3Lexer
                     else:
                         IPyLexer = IPythonLexer
                     DelegatingLexer.__init__(self, IPyLexer,
                                              IPythonPartialTracebackLexer, **options)
             @skip_doctest
             class IPythonConsoleLexer(Lexer):
                 """
                 An IPython console lexer for IPython code-blocks and doctests, such as:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: a = 'foo'
                         In [2]: a
                         Out[2]: 'foo'
                         In [3]: print a
                         foo
                         In [4]: 1 / 0
                 Support is also provided for IPython exceptions:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: raise Exception
                         ---------------------------------------------------------------------------
                         Exception                                 Traceback (most recent call last)
                         <ipython-input-1-fca2ab0ca76b> in <module>()
                         ----> 1 raise Exception
                         Exception:
                 """
                 name = 'IPython console session'
                 aliases = ['ipythonconsole']
                 mimetypes = ['text/x-ipython-console']
                 # The regexps used to determine what is input and what is output.
                 # The default prompts for IPython are:
                 #
                 #     c.PromptManager.in_template  = 'In [\#]: '
                 #     c.PromptManager.in2_template = '   .\D.: '
                 #     c.PromptManager.out_template = 'Out[\#]: '
                 #
                 in1_regex = r'In \[[0-9]+\]: '
                 in2_regex = r'   \.\.+\.: '
                 out_regex = r'Out\[[0-9]+\]: '
                 #: The regex to determine when a traceback starts.
                 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^(  File)(.*)(, line )(\d+\n)')
                 def __init__(self, **options):
                     """Initialize the IPython console lexer.
                     Parameters
                     ----------
                     python3 : bool
                         If `True`, then the console inputs are parsed using a Python 3
                         lexer. Otherwise, they are parsed using a Python 2 lexer.
                     in1_regex : RegexObject
                         The compiled regular expression used to detect the start
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     in2_regex : RegexObject
                         The compiled regular expression used to detect the continuation
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     out_regex : RegexObject
                         The compiled regular expression used to detect outputs. If `None`,
                         then the default output prompt is assumed.
                     """
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3console']
                     else:
                         self.aliases = ['ipython2console', 'ipythonconsole']
                     in1_regex = options.get('in1_regex', self.in1_regex)
                     in2_regex = options.get('in2_regex', self.in2_regex)
                     out_regex = options.get('out_regex', self.out_regex)
                     # So that we can work with input and output prompts which have been
                     # rstrip'd (possibly by editors) we also need rstrip'd variants. If
                     # we do not do this, then such prompts will be tagged as 'output'.
                     # The reason can't just use the rstrip'd variants instead is because
                     # we want any whitespace associated with the prompt to be inserted
                     # with the token. This allows formatted code to be modified so as hide
                     # the appearance of prompts, with the whitespace included. One example
                     # use of this is in copybutton.js from the standard lib Python docs.
                     in1_regex_rstrip = in1_regex.rstrip() + '\n'
                     in2_regex_rstrip = in2_regex.rstrip() + '\n'
                     out_regex_rstrip = out_regex.rstrip() + '\n'
                     # Compile and save them all.
                     attrs = ['in1_regex', 'in2_regex', 'out_regex',
                              'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
                     for attr in attrs:
                         self.__setattr__(attr, re.compile(locals()[attr]))
                     Lexer.__init__(self, **options)
                     if self.python3:
                         pylexer = IPython3Lexer
                         tblexer = IPythonTracebackLexer
                     else:
                         pylexer = IPythonLexer
                         tblexer = IPythonTracebackLexer
                     self.pylexer = pylexer(**options)
                     self.tblexer = tblexer(**options)
                     self.reset()
                 def reset(self):
                     self.mode = 'output'
                     self.index = 0
                     self.buffer = u''
                     self.insertions = []
                 def buffered_tokens(self):
                     """
                     Generator of unprocessed tokens after doing insertions and before
                     changing to a new state.
                     """
                     if self.mode == 'output':
                         tokens = [(0, Generic.Output, self.buffer)]
                     elif self.mode == 'input':
                         tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
                     else: # traceback
                         tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
                     for i, t, v in do_insertions(self.insertions, tokens):
                         # All token indexes are relative to the buffer.
                         yield self.index + i, t, v
                     # Clear it all
                     self.index += len(self.buffer)
                     self.buffer = u''
                     self.insertions = []
                 def get_mci(self, line):
                     """
                     Parses the line and returns a 3-tuple: (mode, code, insertion).
                     `mode` is the next mode (or state) of the lexer, and is always equal
                     to 'input', 'output', or 'tb'.
                     `code` is a portion of the line that should be added to the buffer
                     corresponding to the next mode and eventually lexed by another lexer.
                     For example, `code` could be Python code if `mode` were 'input'.
                     `insertion` is a 3-tuple (index, token, text) representing an
                     unprocessed "token" that will be inserted into the stream of tokens
                     that are created from the buffer once we change modes. This is usually
                     the input or output prompt.
                     In general, the next mode depends on current mode and on the contents
                     of `line`.
                     """
                     # To reduce the number of regex match checks, we have multiple
                     # 'if' blocks instead of 'if-elif' blocks.
                     # Check for possible end of input
                     in2_match = self.in2_regex.match(line)
                     in2_match_rstrip = self.in2_regex_rstrip.match(line)
                     if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
                        in2_match_rstrip:
                         end_input = True
                     else:
                         end_input = False
                     if end_input and self.mode != 'tb':
                         # Only look for an end of input when not in tb mode.
                         # An ellipsis could appear within the traceback.
                         mode = 'output'
                         code = u''
                         insertion = (0, Generic.Prompt, line)
                         return mode, code, insertion
                     # Check for output prompt
                     out_match = self.out_regex.match(line)
                     out_match_rstrip = self.out_regex_rstrip.match(line)
                     if out_match or out_match_rstrip:
                         mode = 'output'
                         if out_match:
                             idx = out_match.end()
                         else:
                             idx = out_match_rstrip.end()
                         code = line[idx:]
                         # Use the 'heading' token for output.  We cannot use Generic.Error
                         # since it would conflict with exceptions.
                         insertion = (0, Generic.Heading, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (non stripped version)
                     in1_match = self.in1_regex.match(line)
                     if in1_match or (in2_match and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match:
                             idx = in1_match.end()
                         else: # in2_match
                             idx = in2_match.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (stripped version)
                     in1_match_rstrip = self.in1_regex_rstrip.match(line)
                     if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match_rstrip:
                             idx = in1_match_rstrip.end()
                         else: # in2_match
                             idx = in2_match_rstrip.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for traceback
                     if self.ipytb_start.match(line):
                         mode = 'tb'
                         code = line
                         insertion = None
                         return mode, code, insertion
                     # All other stuff...
                     if self.mode in ('input', 'output'):
                         # We assume all other text is output. Multiline input that
                         # does not use the continuation marker cannot be detected.
                         # For example, the 3 in the following is clearly output:
                         #
                         #    In [1]: print 3
                         #    3
                         #
                         # But the following second line is part of the input:
                         #
                         #    In [2]: while True:
                         #        print True
                         #
                         # In both cases, the 2nd line will be 'output'.
                         #
                         mode = 'output'
                     else:
                         mode = 'tb'
                     code = line
                     insertion = None
                     return mode, code, insertion
                 def get_tokens_unprocessed(self, text):
                     self.reset()
                     for match in line_re.finditer(text):
                         line = match.group()
                         mode, code, insertion = self.get_mci(line)
                         if mode != self.mode:
                             # Yield buffered tokens before transitioning to new mode.
                             for token in self.buffered_tokens():
                                 yield token
                             self.mode = mode
                         if insertion:
                             self.insertions.append((len(self.buffer), [insertion]))
                         self.buffer += code
                     else:
                         for token in self.buffered_tokens():
                             yield token
             class IPyLexer(Lexer):
                 """
                 Primary lexer for all IPython-like code.
                 This is a simple helper lexer.  If the first line of the text begins with
                 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
                 lexer. If not, then the entire text is parsed with an IPython lexer.
                 The goal is to reduce the number of lexers that are registered
                 with Pygments.
                 """
                 name = 'IPy session'
                 aliases = ['ipy']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipy3']
                     else:
                         self.aliases = ['ipy2', 'ipy']
                     Lexer.__init__(self, **options)
                     self.IPythonLexer = IPythonLexer(**options)
                     self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
                 def get_tokens_unprocessed(self, text):
                     # Search for the input prompt anywhere...this allows code blocks to
                     # begin with comments as well.
                     if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
                         lex = self.IPythonConsoleLexer
                     else:
                         lex = self.IPythonLexer
                     for token in lex.get_tokens_unprocessed(text):
                         yield token

             """Test lexers module"""
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2014 The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             from pygments.token import Token
             from IPython.nbconvert.tests.base import TestsBase
             from .. import lexers
             #-----------------------------------------------------------------------------
             # Classes and functions
             #-----------------------------------------------------------------------------
             class TestLexers(TestsBase):
                 """Collection of lexers tests"""
                 def setUp(self):
                     self.lexer = lexers.IPythonLexer()
                 def testIPythonLexer(self):
                     fragment = '!echo $HOME\n'
                     tokens = [
                         (Token.Operator, '!'),
                         (Token.Name.Builtin, 'echo'),
                         (Token.Text, ' '),
                         (Token.Name.Variable, '$HOME'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
+                    fragment_2 = '!' + fragment
+                    tokens_2 = [
+                        (Token.Operator, '!!'),
+                    ] + tokens[1:]
+                    self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = %sx ' + fragment[1:]
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'sx'),
                         (Token.Text, ' '),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'f = %R function () {}\n'
                     tokens_2 = [
                         (Token.Name, 'f'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'R'),
                         (Token.Text, ' function () {}\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = '\t%%xyz\n$foo\n'
                     tokens_2 = [
                         (Token.Text, '\t'),
                         (Token.Operator, '%%'),
                         (Token.Keyword, 'xyz'),
                         (Token.Text, '\n$foo\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))