upstream/ipython Commit - r20148:09a21620

1

# -*- coding: utf-8 -*-

1

# -*- coding: utf-8 -*-

2

"""

2

"""

3

Defines a variety of Pygments lexers for highlighting IPython code.

3

Defines a variety of Pygments lexers for highlighting IPython code.

4

5

This includes:

5

This includes:

6

7

IPythonLexer, IPython3Lexer

7

IPythonLexer, IPython3Lexer

8

Lexers for pure IPython (python + magic/shell commands)

8

Lexers for pure IPython (python + magic/shell commands)

9

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

10

IPythonPartialTracebackLexer, IPythonTracebackLexer

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

11

Supports 2.x and 3.x via keyword `python3`. The partial traceback

12

lexer reads everything but the Python code appearing in a traceback.

12

lexer reads everything but the Python code appearing in a traceback.

13

The full lexer combines the partial lexer with an IPython lexer.

13

The full lexer combines the partial lexer with an IPython lexer.

14

15

IPythonConsoleLexer

15

IPythonConsoleLexer

16

A lexer for IPython console sessions, with support for tracebacks.

16

A lexer for IPython console sessions, with support for tracebacks.

17

18

IPyLexer

18

IPyLexer

19

A friendly lexer which examines the first line of text and from it,

19

A friendly lexer which examines the first line of text and from it,

20

decides whether to use an IPython lexer or an IPython console lexer.

20

decides whether to use an IPython lexer or an IPython console lexer.

21

This is probably the only lexer that needs to be explicitly added

21

This is probably the only lexer that needs to be explicitly added

22

to Pygments.

22

to Pygments.

23

24

"""

24

"""

25

#-----------------------------------------------------------------------------

25

#-----------------------------------------------------------------------------

26

27

#

27

#

28

# Distributed under the terms of the Modified BSD License.

28

# Distributed under the terms of the Modified BSD License.

29

#

29

#

30

# The full license is in the file COPYING.txt, distributed with this software.

30

# The full license is in the file COPYING.txt, distributed with this software.

31

#-----------------------------------------------------------------------------

31

#-----------------------------------------------------------------------------

32

33

# Standard library

33

# Standard library

34

import re

34

import re

35

36

# Third party

36

# Third party

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

37

from pygments.lexers import BashLexer, PythonLexer, Python3Lexer

38

from pygments.lexer import (

38

from pygments.lexer import (

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

39

Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,

40

)

40

)

41

from pygments.token import (

41

from pygments.token import (

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

42

Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,

43

)

43

)

44

from pygments.util import get_bool_opt

44

from pygments.util import get_bool_opt

45

46

# Local

46

# Local

47

from IPython.testing.skipdoctest import skip_doctest

47

from IPython.testing.skipdoctest import skip_doctest

48

49

line_re = re.compile('.*?\n')

49

line_re = re.compile('.*?\n')

50

51

ipython_tokens = [

51

ipython_tokens = [

52

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

52

(r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),

53

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

53

(r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),

54

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

54

(r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),

55

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

55

(r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),

56

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

56

(r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,

57

using(BashLexer), Text)),

57

using(BashLexer), Text)),

58

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

58

(r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),

59

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

59

(r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

60

(r'((?!=)!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

60

(r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),

61

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

61

(r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),

62

]

62

]

63

64

def build_ipy_lexer(python3):

64

def build_ipy_lexer(python3):

65

"""Builds IPython lexers depending on the value of `python3`.

65

"""Builds IPython lexers depending on the value of `python3`.

66

67

The lexer inherits from an appropriate Python lexer and then adds

67

The lexer inherits from an appropriate Python lexer and then adds

68

information about IPython specific keywords (i.e. magic commands,

68

information about IPython specific keywords (i.e. magic commands,

69

shell commands, etc.)

69

shell commands, etc.)

70

71

Parameters

71

Parameters

72

----------

72

----------

73

python3 : bool

73

python3 : bool

74

If `True`, then build an IPython lexer from a Python 3 lexer.

74

If `True`, then build an IPython lexer from a Python 3 lexer.

75

76

"""

76

"""

77

# It would be nice to have a single IPython lexer class which takes

77

# It would be nice to have a single IPython lexer class which takes

78

# a boolean `python3`. But since there are two Python lexer classes,

78

# a boolean `python3`. But since there are two Python lexer classes,

79

# we will also have two IPython lexer classes.

79

# we will also have two IPython lexer classes.

80

if python3:

80

if python3:

81

PyLexer = Python3Lexer

81

PyLexer = Python3Lexer

82

clsname = 'IPython3Lexer'

82

clsname = 'IPython3Lexer'

83

name = 'IPython3'

83

name = 'IPython3'

84

aliases = ['ipython3']

84

aliases = ['ipython3']

85

doc = """IPython3 Lexer"""

85

doc = """IPython3 Lexer"""

86

else:

86

else:

87

PyLexer = PythonLexer

87

PyLexer = PythonLexer

88

clsname = 'IPythonLexer'

88

clsname = 'IPythonLexer'

89

name = 'IPython'

89

name = 'IPython'

90

aliases = ['ipython2', 'ipython']

90

aliases = ['ipython2', 'ipython']

91

doc = """IPython Lexer"""

91

doc = """IPython Lexer"""

92

93

tokens = PyLexer.tokens.copy()

93

tokens = PyLexer.tokens.copy()

94

tokens['root'] = ipython_tokens + tokens['root']

94

tokens['root'] = ipython_tokens + tokens['root']

95

96

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

96

attrs = {'name': name, 'aliases': aliases, 'filenames': [],

97

'__doc__': doc, 'tokens': tokens}

97

'__doc__': doc, 'tokens': tokens}

98

99

return type(name, (PyLexer,), attrs)

99

return type(name, (PyLexer,), attrs)

100

101

102

IPython3Lexer = build_ipy_lexer(python3=True)

102

IPython3Lexer = build_ipy_lexer(python3=True)

103

IPythonLexer = build_ipy_lexer(python3=False)

103

IPythonLexer = build_ipy_lexer(python3=False)

104

105

106

class IPythonPartialTracebackLexer(RegexLexer):

106

class IPythonPartialTracebackLexer(RegexLexer):

107

"""

107

"""

108

Partial lexer for IPython tracebacks.

108

Partial lexer for IPython tracebacks.

109

110

Handles all the non-python output. This works for both Python 2.x and 3.x.

110

Handles all the non-python output. This works for both Python 2.x and 3.x.

111

112

"""

112

"""

113

name = 'IPython Partial Traceback'

113

name = 'IPython Partial Traceback'

114

115

tokens = {

115

tokens = {

116

'root': [

116

'root': [

117

# Tracebacks for syntax errors have a different style.

117

# Tracebacks for syntax errors have a different style.

118

# For both types of tracebacks, we mark the first line with

118

# For both types of tracebacks, we mark the first line with

119

# Generic.Traceback. For syntax errors, we mark the filename

119

# Generic.Traceback. For syntax errors, we mark the filename

120

# as we mark the filenames for non-syntax tracebacks.

120

# as we mark the filenames for non-syntax tracebacks.

121

#

121

#

122

# These two regexps define how IPythonConsoleLexer finds a

122

# These two regexps define how IPythonConsoleLexer finds a

123

# traceback.

123

# traceback.

124

#

124

#

125

## Non-syntax traceback

125

## Non-syntax traceback

126

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

126

(r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),

127

## Syntax traceback

127

## Syntax traceback

128

(r'^( File)(.*)(, line )(\d+\n)',

128

(r'^( File)(.*)(, line )(\d+\n)',

129

bygroups(Generic.Traceback, Name.Namespace,

129

bygroups(Generic.Traceback, Name.Namespace,

130

Generic.Traceback, Literal.Number.Integer)),

130

Generic.Traceback, Literal.Number.Integer)),

131

132

# (Exception Identifier)(Whitespace)(Traceback Message)

132

# (Exception Identifier)(Whitespace)(Traceback Message)

133

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

133

(r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',

134

bygroups(Name.Exception, Generic.Whitespace, Text)),

134

bygroups(Name.Exception, Generic.Whitespace, Text)),

135

# (Module/Filename)(Text)(Callee)(Function Signature)

135

# (Module/Filename)(Text)(Callee)(Function Signature)

136

# Better options for callee and function signature?

136

# Better options for callee and function signature?

137

(r'(.*)( in )(.*)($.*$\n)',

137

(r'(.*)( in )(.*)($.*$\n)',

138

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

138

bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),

139

# Regular line: (Whitespace)(Line Number)(Python Code)

139

# Regular line: (Whitespace)(Line Number)(Python Code)

140

(r'(\s*?)(\d+)(.*?\n)',

140

(r'(\s*?)(\d+)(.*?\n)',

141

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

141

bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),

142

# Emphasized line: (Arrow)(Line Number)(Python Code)

142

# Emphasized line: (Arrow)(Line Number)(Python Code)

143

# Using Exception token so arrow color matches the Exception.

143

# Using Exception token so arrow color matches the Exception.

144

(r'(-*>?\s?)(\d+)(.*?\n)',

144

(r'(-*>?\s?)(\d+)(.*?\n)',

145

bygroups(Name.Exception, Literal.Number.Integer, Other)),

145

bygroups(Name.Exception, Literal.Number.Integer, Other)),

146

# (Exception Identifier)(Message)

146

# (Exception Identifier)(Message)

147

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

147

(r'(?u)(^[^\d\W]\w*)(:.*?\n)',

148

bygroups(Name.Exception, Text)),

148

bygroups(Name.Exception, Text)),

149

# Tag everything else as Other, will be handled later.

149

# Tag everything else as Other, will be handled later.

150

(r'.*\n', Other),

150

(r'.*\n', Other),

151

],

151

],

152

}

152

}

153

154

155

class IPythonTracebackLexer(DelegatingLexer):

155

class IPythonTracebackLexer(DelegatingLexer):

156

"""

156

"""

157

IPython traceback lexer.

157

IPython traceback lexer.

158

159

For doctests, the tracebacks can be snipped as much as desired with the

159

For doctests, the tracebacks can be snipped as much as desired with the

160

exception to the lines that designate a traceback. For non-syntax error

160

exception to the lines that designate a traceback. For non-syntax error

161

tracebacks, this is the line of hyphens. For syntax error tracebacks,

161

tracebacks, this is the line of hyphens. For syntax error tracebacks,

162

this is the line which lists the File and line number.

162

this is the line which lists the File and line number.

163

164

"""

164

"""

165

# The lexer inherits from DelegatingLexer. The "root" lexer is an

165

# The lexer inherits from DelegatingLexer. The "root" lexer is an

166

# appropriate IPython lexer, which depends on the value of the boolean

166

# appropriate IPython lexer, which depends on the value of the boolean

167

# `python3`. First, we parse with the partial IPython traceback lexer.

167

# `python3`. First, we parse with the partial IPython traceback lexer.

168

# Then, any code marked with the "Other" token is delegated to the root

168

# Then, any code marked with the "Other" token is delegated to the root

169

# lexer.

169

# lexer.

170

#

170

#

171

name = 'IPython Traceback'

171

name = 'IPython Traceback'

172

aliases = ['ipythontb']

172

aliases = ['ipythontb']

173

174

def __init__(self, **options):

174

def __init__(self, **options):

175

self.python3 = get_bool_opt(options, 'python3', False)

175

self.python3 = get_bool_opt(options, 'python3', False)

176

if self.python3:

176

if self.python3:

177

self.aliases = ['ipython3tb']

177

self.aliases = ['ipython3tb']

178

else:

178

else:

179

self.aliases = ['ipython2tb', 'ipythontb']

179

self.aliases = ['ipython2tb', 'ipythontb']

180

181

if self.python3:

181

if self.python3:

182

IPyLexer = IPython3Lexer

182

IPyLexer = IPython3Lexer

183

else:

183

else:

184

IPyLexer = IPythonLexer

184

IPyLexer = IPythonLexer

185

186

DelegatingLexer.__init__(self, IPyLexer,

186

DelegatingLexer.__init__(self, IPyLexer,

187

IPythonPartialTracebackLexer, **options)

187

IPythonPartialTracebackLexer, **options)

188

189

@skip_doctest

189

@skip_doctest

190

class IPythonConsoleLexer(Lexer):

190

class IPythonConsoleLexer(Lexer):

191

"""

191

"""

192

An IPython console lexer for IPython code-blocks and doctests, such as:

192

An IPython console lexer for IPython code-blocks and doctests, such as:

193

194

.. code-block:: rst

194

.. code-block:: rst

195

196

.. code-block:: ipythonconsole

196

.. code-block:: ipythonconsole

197

198

In [1]: a = 'foo'

198

In [1]: a = 'foo'

199

200

In [2]: a

200

In [2]: a

201

Out[2]: 'foo'

201

Out[2]: 'foo'

202

203

In [3]: print a

203

In [3]: print a

204

foo

204

foo

205

206

In [4]: 1 / 0

206

In [4]: 1 / 0

207

208

209

Support is also provided for IPython exceptions:

209

Support is also provided for IPython exceptions:

210

211

.. code-block:: rst

211

.. code-block:: rst

212

213

.. code-block:: ipythonconsole

213

.. code-block:: ipythonconsole

214

215

In [1]: raise Exception

215

In [1]: raise Exception

216

217

---------------------------------------------------------------------------

217

---------------------------------------------------------------------------

218

Exception Traceback (most recent call last)

218

Exception Traceback (most recent call last)

219

<ipython-input-1-fca2ab0ca76b> in <module>()

219

<ipython-input-1-fca2ab0ca76b> in <module>()

220

----> 1 raise Exception

220

----> 1 raise Exception

221

222

Exception:

222

Exception:

223

224

"""

224

"""

225

name = 'IPython console session'

225

name = 'IPython console session'

226

aliases = ['ipythonconsole']

226

aliases = ['ipythonconsole']

227

mimetypes = ['text/x-ipython-console']

227

mimetypes = ['text/x-ipython-console']

228

229

# The regexps used to determine what is input and what is output.

229

# The regexps used to determine what is input and what is output.

230

# The default prompts for IPython are:

230

# The default prompts for IPython are:

231

#

231

#

232

# c.PromptManager.in_template = 'In [\#]: '

232

# c.PromptManager.in_template = 'In [\#]: '

233

# c.PromptManager.in2_template = ' .\D.: '

233

# c.PromptManager.in2_template = ' .\D.: '

234

# c.PromptManager.out_template = 'Out[\#]: '

234

# c.PromptManager.out_template = 'Out[\#]: '

235

#

235

#

236

in1_regex = r'In \[[0-9]+\]: '

236

in1_regex = r'In \[[0-9]+\]: '

237

in2_regex = r' \.\.+\.: '

237

in2_regex = r' \.\.+\.: '

238

out_regex = r'Out\[[0-9]+\]: '

238

out_regex = r'Out\[[0-9]+\]: '

239

240

#: The regex to determine when a traceback starts.

240

#: The regex to determine when a traceback starts.

241

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

241

ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')

242

243

def __init__(self, **options):

243

def __init__(self, **options):

244

"""Initialize the IPython console lexer.

244

"""Initialize the IPython console lexer.

245

246

Parameters

246

Parameters

247

----------

247

----------

248

python3 : bool

248

python3 : bool

249

If `True`, then the console inputs are parsed using a Python 3

249

If `True`, then the console inputs are parsed using a Python 3

250

lexer. Otherwise, they are parsed using a Python 2 lexer.

250

lexer. Otherwise, they are parsed using a Python 2 lexer.

251

in1_regex : RegexObject

251

in1_regex : RegexObject

252

The compiled regular expression used to detect the start

252

The compiled regular expression used to detect the start

253

of inputs. Although the IPython configuration setting may have a

253

of inputs. Although the IPython configuration setting may have a

254

trailing whitespace, do not include it in the regex. If `None`,

254

trailing whitespace, do not include it in the regex. If `None`,

255

then the default input prompt is assumed.

255

then the default input prompt is assumed.

256

in2_regex : RegexObject

256

in2_regex : RegexObject

257

The compiled regular expression used to detect the continuation

257

The compiled regular expression used to detect the continuation

258

of inputs. Although the IPython configuration setting may have a

258

of inputs. Although the IPython configuration setting may have a

259

trailing whitespace, do not include it in the regex. If `None`,

259

trailing whitespace, do not include it in the regex. If `None`,

260

then the default input prompt is assumed.

260

then the default input prompt is assumed.

261

out_regex : RegexObject

261

out_regex : RegexObject

262

The compiled regular expression used to detect outputs. If `None`,

262

The compiled regular expression used to detect outputs. If `None`,

263

then the default output prompt is assumed.

263

then the default output prompt is assumed.

264

265

"""

265

"""

266

self.python3 = get_bool_opt(options, 'python3', False)

266

self.python3 = get_bool_opt(options, 'python3', False)

267

if self.python3:

267

if self.python3:

268

self.aliases = ['ipython3console']

268

self.aliases = ['ipython3console']

269

else:

269

else:

270

self.aliases = ['ipython2console', 'ipythonconsole']

270

self.aliases = ['ipython2console', 'ipythonconsole']

271

272

in1_regex = options.get('in1_regex', self.in1_regex)

272

in1_regex = options.get('in1_regex', self.in1_regex)

273

in2_regex = options.get('in2_regex', self.in2_regex)

273

in2_regex = options.get('in2_regex', self.in2_regex)

274

out_regex = options.get('out_regex', self.out_regex)

274

out_regex = options.get('out_regex', self.out_regex)

275

276

# So that we can work with input and output prompts which have been

276

# So that we can work with input and output prompts which have been

277

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

277

# rstrip'd (possibly by editors) we also need rstrip'd variants. If

278

# we do not do this, then such prompts will be tagged as 'output'.

278

# we do not do this, then such prompts will be tagged as 'output'.

279

# The reason can't just use the rstrip'd variants instead is because

279

# The reason can't just use the rstrip'd variants instead is because

280

# we want any whitespace associated with the prompt to be inserted

280

# we want any whitespace associated with the prompt to be inserted

281

# with the token. This allows formatted code to be modified so as hide

281

# with the token. This allows formatted code to be modified so as hide

282

# the appearance of prompts, with the whitespace included. One example

282

# the appearance of prompts, with the whitespace included. One example

283

# use of this is in copybutton.js from the standard lib Python docs.

283

# use of this is in copybutton.js from the standard lib Python docs.

284

in1_regex_rstrip = in1_regex.rstrip() + '\n'

284

in1_regex_rstrip = in1_regex.rstrip() + '\n'

285

in2_regex_rstrip = in2_regex.rstrip() + '\n'

285

in2_regex_rstrip = in2_regex.rstrip() + '\n'

286

out_regex_rstrip = out_regex.rstrip() + '\n'

286

out_regex_rstrip = out_regex.rstrip() + '\n'

287

288

# Compile and save them all.

288

# Compile and save them all.

289

attrs = ['in1_regex', 'in2_regex', 'out_regex',

289

attrs = ['in1_regex', 'in2_regex', 'out_regex',

290

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

290

'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']

291

for attr in attrs:

291

for attr in attrs:

292

self.__setattr__(attr, re.compile(locals()[attr]))

292

self.__setattr__(attr, re.compile(locals()[attr]))

293

294

Lexer.__init__(self, **options)

294

Lexer.__init__(self, **options)

295

296

if self.python3:

296

if self.python3:

297

pylexer = IPython3Lexer

297

pylexer = IPython3Lexer

298

tblexer = IPythonTracebackLexer

298

tblexer = IPythonTracebackLexer

299

else:

299

else:

300

pylexer = IPythonLexer

300

pylexer = IPythonLexer

301

tblexer = IPythonTracebackLexer

301

tblexer = IPythonTracebackLexer

302

303

self.pylexer = pylexer(**options)

303

self.pylexer = pylexer(**options)

304

self.tblexer = tblexer(**options)

304

self.tblexer = tblexer(**options)

305

306

self.reset()

306

self.reset()

307

308

def reset(self):

308

def reset(self):

309

self.mode = 'output'

309

self.mode = 'output'

310

self.index = 0

310

self.index = 0

311

self.buffer = u''

311

self.buffer = u''

312

self.insertions = []

312

self.insertions = []

313

314

def buffered_tokens(self):

314

def buffered_tokens(self):

315

"""

315

"""

316

Generator of unprocessed tokens after doing insertions and before

316

Generator of unprocessed tokens after doing insertions and before

317

changing to a new state.

317

changing to a new state.

318

319

"""

319

"""

320

if self.mode == 'output':

320

if self.mode == 'output':

321

tokens = [(0, Generic.Output, self.buffer)]

321

tokens = [(0, Generic.Output, self.buffer)]

322

elif self.mode == 'input':

322

elif self.mode == 'input':

323

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

323

tokens = self.pylexer.get_tokens_unprocessed(self.buffer)

324

else: # traceback

324

else: # traceback

325

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

325

tokens = self.tblexer.get_tokens_unprocessed(self.buffer)

326

327

for i, t, v in do_insertions(self.insertions, tokens):

327

for i, t, v in do_insertions(self.insertions, tokens):

328

# All token indexes are relative to the buffer.

328

# All token indexes are relative to the buffer.

329

yield self.index + i, t, v

329

yield self.index + i, t, v

330

331

# Clear it all

331

# Clear it all

332

self.index += len(self.buffer)

332

self.index += len(self.buffer)

333

self.buffer = u''

333

self.buffer = u''

334

self.insertions = []

334

self.insertions = []

335

336

def get_mci(self, line):

336

def get_mci(self, line):

337

"""

337

"""

338

Parses the line and returns a 3-tuple: (mode, code, insertion).

338

Parses the line and returns a 3-tuple: (mode, code, insertion).

339

340

`mode` is the next mode (or state) of the lexer, and is always equal

340

`mode` is the next mode (or state) of the lexer, and is always equal

341

to 'input', 'output', or 'tb'.

341

to 'input', 'output', or 'tb'.

342

343

`code` is a portion of the line that should be added to the buffer

343

`code` is a portion of the line that should be added to the buffer

344

corresponding to the next mode and eventually lexed by another lexer.

344

corresponding to the next mode and eventually lexed by another lexer.

345

For example, `code` could be Python code if `mode` were 'input'.

345

For example, `code` could be Python code if `mode` were 'input'.

346

347

`insertion` is a 3-tuple (index, token, text) representing an

347

`insertion` is a 3-tuple (index, token, text) representing an

348

unprocessed "token" that will be inserted into the stream of tokens

348

unprocessed "token" that will be inserted into the stream of tokens

349

that are created from the buffer once we change modes. This is usually

349

that are created from the buffer once we change modes. This is usually

350

the input or output prompt.

350

the input or output prompt.

351

352

In general, the next mode depends on current mode and on the contents

352

In general, the next mode depends on current mode and on the contents

353

of `line`.

353

of `line`.

354

355

"""

355

"""

356

# To reduce the number of regex match checks, we have multiple

356

# To reduce the number of regex match checks, we have multiple

357

# 'if' blocks instead of 'if-elif' blocks.

357

# 'if' blocks instead of 'if-elif' blocks.

358

359

# Check for possible end of input

359

# Check for possible end of input

360

in2_match = self.in2_regex.match(line)

360

in2_match = self.in2_regex.match(line)

361

in2_match_rstrip = self.in2_regex_rstrip.match(line)

361

in2_match_rstrip = self.in2_regex_rstrip.match(line)

362

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

362

if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \

363

in2_match_rstrip:

363

in2_match_rstrip:

364

end_input = True

364

end_input = True

365

else:

365

else:

366

end_input = False

366

end_input = False

367

if end_input and self.mode != 'tb':

367

if end_input and self.mode != 'tb':

368

# Only look for an end of input when not in tb mode.

368

# Only look for an end of input when not in tb mode.

369

# An ellipsis could appear within the traceback.

369

# An ellipsis could appear within the traceback.

370

mode = 'output'

370

mode = 'output'

371

code = u''

371

code = u''

372

insertion = (0, Generic.Prompt, line)

372

insertion = (0, Generic.Prompt, line)

373

return mode, code, insertion

373

return mode, code, insertion

374

375

# Check for output prompt

375

# Check for output prompt

376

out_match = self.out_regex.match(line)

376

out_match = self.out_regex.match(line)

377

out_match_rstrip = self.out_regex_rstrip.match(line)

377

out_match_rstrip = self.out_regex_rstrip.match(line)

378

if out_match or out_match_rstrip:

378

if out_match or out_match_rstrip:

379

mode = 'output'

379

mode = 'output'

380

if out_match:

380

if out_match:

381

idx = out_match.end()

381

idx = out_match.end()

382

else:

382

else:

383

idx = out_match_rstrip.end()

383

idx = out_match_rstrip.end()

384

code = line[idx:]

384

code = line[idx:]

385

# Use the 'heading' token for output. We cannot use Generic.Error

385

# Use the 'heading' token for output. We cannot use Generic.Error

386

# since it would conflict with exceptions.

386

# since it would conflict with exceptions.

387

insertion = (0, Generic.Heading, line[:idx])

387

insertion = (0, Generic.Heading, line[:idx])

388

return mode, code, insertion

388

return mode, code, insertion

389

390

391

# Check for input or continuation prompt (non stripped version)

391

# Check for input or continuation prompt (non stripped version)

392

in1_match = self.in1_regex.match(line)

392

in1_match = self.in1_regex.match(line)

393

if in1_match or (in2_match and self.mode != 'tb'):

393

if in1_match or (in2_match and self.mode != 'tb'):

394

# New input or when not in tb, continued input.

394

# New input or when not in tb, continued input.

395

# We do not check for continued input when in tb since it is

395

# We do not check for continued input when in tb since it is

396

# allowable to replace a long stack with an ellipsis.

396

# allowable to replace a long stack with an ellipsis.

397

mode = 'input'

397

mode = 'input'

398

if in1_match:

398

if in1_match:

399

idx = in1_match.end()

399

idx = in1_match.end()

400

else: # in2_match

400

else: # in2_match

401

idx = in2_match.end()

401

idx = in2_match.end()

402

code = line[idx:]

402

code = line[idx:]

403

insertion = (0, Generic.Prompt, line[:idx])

403

insertion = (0, Generic.Prompt, line[:idx])

404

return mode, code, insertion

404

return mode, code, insertion

405

406

# Check for input or continuation prompt (stripped version)

406

# Check for input or continuation prompt (stripped version)

407

in1_match_rstrip = self.in1_regex_rstrip.match(line)

407

in1_match_rstrip = self.in1_regex_rstrip.match(line)

408

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

408

if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):

409

# New input or when not in tb, continued input.

409

# New input or when not in tb, continued input.

410

# We do not check for continued input when in tb since it is

410

# We do not check for continued input when in tb since it is

411

# allowable to replace a long stack with an ellipsis.

411

# allowable to replace a long stack with an ellipsis.

412

mode = 'input'

412

mode = 'input'

413

if in1_match_rstrip:

413

if in1_match_rstrip:

414

idx = in1_match_rstrip.end()

414

idx = in1_match_rstrip.end()

415

else: # in2_match

415

else: # in2_match

416

idx = in2_match_rstrip.end()

416

idx = in2_match_rstrip.end()

417

code = line[idx:]

417

code = line[idx:]

418

insertion = (0, Generic.Prompt, line[:idx])

418

insertion = (0, Generic.Prompt, line[:idx])

419

return mode, code, insertion

419

return mode, code, insertion

420

421

# Check for traceback

421

# Check for traceback

422

if self.ipytb_start.match(line):

422

if self.ipytb_start.match(line):

423

mode = 'tb'

423

mode = 'tb'

424

code = line

424

code = line

425

insertion = None

425

insertion = None

426

return mode, code, insertion

426

return mode, code, insertion

427

428

# All other stuff...

428

# All other stuff...

429

if self.mode in ('input', 'output'):

429

if self.mode in ('input', 'output'):

430

# We assume all other text is output. Multiline input that

430

# We assume all other text is output. Multiline input that

431

# does not use the continuation marker cannot be detected.

431

# does not use the continuation marker cannot be detected.

432

# For example, the 3 in the following is clearly output:

432

# For example, the 3 in the following is clearly output:

433

#

433

#

434

# In [1]: print 3

434

# In [1]: print 3

435

# 3

435

# 3

436

#

436

#

437

# But the following second line is part of the input:

437

# But the following second line is part of the input:

438

#

438

#

439

# In [2]: while True:

439

# In [2]: while True:

440

# print True

440

# print True

441

#

441

#

442

# In both cases, the 2nd line will be 'output'.

442

# In both cases, the 2nd line will be 'output'.

443

#

443

#

444

mode = 'output'

444

mode = 'output'

445

else:

445

else:

446

mode = 'tb'

446

mode = 'tb'

447

448

code = line

448

code = line

449

insertion = None

449

insertion = None

450

451

return mode, code, insertion

451

return mode, code, insertion

452

453

def get_tokens_unprocessed(self, text):

453

def get_tokens_unprocessed(self, text):

454

self.reset()

454

self.reset()

455

for match in line_re.finditer(text):

455

for match in line_re.finditer(text):

456

line = match.group()

456

line = match.group()

457

mode, code, insertion = self.get_mci(line)

457

mode, code, insertion = self.get_mci(line)

458

459

if mode != self.mode:

459

if mode != self.mode:

460

# Yield buffered tokens before transitioning to new mode.

460

# Yield buffered tokens before transitioning to new mode.

461

for token in self.buffered_tokens():

461

for token in self.buffered_tokens():

462

yield token

462

yield token

463

self.mode = mode

463

self.mode = mode

464

465

if insertion:

465

if insertion:

466

self.insertions.append((len(self.buffer), [insertion]))

466

self.insertions.append((len(self.buffer), [insertion]))

467

self.buffer += code

467

self.buffer += code

468

else:

468

else:

469

for token in self.buffered_tokens():

469

for token in self.buffered_tokens():

470

yield token

470

yield token

471

472

class IPyLexer(Lexer):

472

class IPyLexer(Lexer):

473

"""

473

"""

474

Primary lexer for all IPython-like code.

474

Primary lexer for all IPython-like code.

475

476

This is a simple helper lexer. If the first line of the text begins with

476

This is a simple helper lexer. If the first line of the text begins with

477

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

477

"In \[[0-9]+\]:", then the entire text is parsed with an IPython console

478

lexer. If not, then the entire text is parsed with an IPython lexer.

478

lexer. If not, then the entire text is parsed with an IPython lexer.

479

480

The goal is to reduce the number of lexers that are registered

480

The goal is to reduce the number of lexers that are registered

481

with Pygments.

481

with Pygments.

482

483

"""

483

"""

484

name = 'IPy session'

484

name = 'IPy session'

485

aliases = ['ipy']

485

aliases = ['ipy']

486

487

def __init__(self, **options):

487

def __init__(self, **options):

488

self.python3 = get_bool_opt(options, 'python3', False)

488

self.python3 = get_bool_opt(options, 'python3', False)

489

if self.python3:

489

if self.python3:

490

self.aliases = ['ipy3']

490

self.aliases = ['ipy3']

491

else:

491

else:

492

self.aliases = ['ipy2', 'ipy']

492

self.aliases = ['ipy2', 'ipy']

493

494

Lexer.__init__(self, **options)

494

Lexer.__init__(self, **options)

495

496

self.IPythonLexer = IPythonLexer(**options)

496

self.IPythonLexer = IPythonLexer(**options)

497

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

497

self.IPythonConsoleLexer = IPythonConsoleLexer(**options)

498

499

def get_tokens_unprocessed(self, text):

499

def get_tokens_unprocessed(self, text):

500

# Search for the input prompt anywhere...this allows code blocks to

500

# Search for the input prompt anywhere...this allows code blocks to

501

# begin with comments as well.

501

# begin with comments as well.

502

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

502

if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):

503

lex = self.IPythonConsoleLexer

503

lex = self.IPythonConsoleLexer

504

else:

504

else:

505

lex = self.IPythonLexer

505

lex = self.IPythonLexer

506

for token in lex.get_tokens_unprocessed(text):

506

for token in lex.get_tokens_unprocessed(text):

507

yield token

507

yield token

508

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # -*- coding: utf-8 -*-
             """
             Defines a variety of Pygments lexers for highlighting IPython code.
             This includes:
                 IPythonLexer, IPython3Lexer
                     Lexers for pure IPython (python + magic/shell commands)
                 IPythonPartialTracebackLexer, IPythonTracebackLexer
                     Supports 2.x and 3.x via keyword `python3`.  The partial traceback
                     lexer reads everything but the Python code appearing in a traceback.
                     The full lexer combines the partial lexer with an IPython lexer.
                 IPythonConsoleLexer
                     A lexer for IPython console sessions, with support for tracebacks.
                 IPyLexer
                     A friendly lexer which examines the first line of text and from it,
                     decides whether to use an IPython lexer or an IPython console lexer.
                     This is probably the only lexer that needs to be explicitly added
                     to Pygments.
             """
             #-----------------------------------------------------------------------------
             # Copyright (c) 2013, the IPython Development Team.
             #
             # Distributed under the terms of the Modified BSD License.
             #
             # The full license is in the file COPYING.txt, distributed with this software.
             #-----------------------------------------------------------------------------
             # Standard library
             import re
             # Third party
             from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
             from pygments.lexer import (
                 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
             )
             from pygments.token import (
                 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
             )
             from pygments.util import get_bool_opt
             # Local
             from IPython.testing.skipdoctest import skip_doctest
             line_re = re.compile('.*?\n')
             ipython_tokens = [
               (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
               (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
               (r"(%%?)(\w+)(\?\??)$",  bygroups(Operator, Keyword, Operator)),
               (r"\b(\?\??)(\s*)$",  bygroups(Operator, Text)),
               (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
                                                    using(BashLexer), Text)),
               (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
               (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
-              (r'((?!=)!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
+              (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
               (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
             ]
             def build_ipy_lexer(python3):
                 """Builds IPython lexers depending on the value of `python3`.
                 The lexer inherits from an appropriate Python lexer and then adds
                 information about IPython specific keywords (i.e. magic commands,
                 shell commands, etc.)
                 Parameters
                 ----------
                 python3 : bool
                     If `True`, then build an IPython lexer from a Python 3 lexer.
                 """
                 # It would be nice to have a single IPython lexer class which takes
                 # a boolean `python3`.  But since there are two Python lexer classes,
                 # we will also have two IPython lexer classes.
                 if python3:
                     PyLexer = Python3Lexer
                     clsname = 'IPython3Lexer'
                     name = 'IPython3'
                     aliases = ['ipython3']
                     doc = """IPython3 Lexer"""
                 else:
                     PyLexer = PythonLexer
                     clsname = 'IPythonLexer'
                     name = 'IPython'
                     aliases = ['ipython2', 'ipython']
                     doc = """IPython Lexer"""
                 tokens = PyLexer.tokens.copy()
                 tokens['root'] = ipython_tokens + tokens['root']
                 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
                          '__doc__': doc, 'tokens': tokens}
                 return type(name, (PyLexer,), attrs)
             IPython3Lexer = build_ipy_lexer(python3=True)
             IPythonLexer = build_ipy_lexer(python3=False)
             class IPythonPartialTracebackLexer(RegexLexer):
                 """
                 Partial lexer for IPython tracebacks.
                 Handles all the non-python output. This works for both Python 2.x and 3.x.
                 """
                 name = 'IPython Partial Traceback'
                 tokens = {
                     'root': [
                         # Tracebacks for syntax errors have a different style.
                         # For both types of tracebacks, we mark the first line with
                         # Generic.Traceback.  For syntax errors, we mark the filename
                         # as we mark the filenames for non-syntax tracebacks.
                         #
                         # These two regexps define how IPythonConsoleLexer finds a
                         # traceback.
                         #
                         ## Non-syntax traceback
                         (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
                         ## Syntax traceback
                         (r'^(  File)(.*)(, line )(\d+\n)',
                          bygroups(Generic.Traceback, Name.Namespace,
                                   Generic.Traceback, Literal.Number.Integer)),
                         # (Exception Identifier)(Whitespace)(Traceback Message)
                         (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
                          bygroups(Name.Exception, Generic.Whitespace, Text)),
                         # (Module/Filename)(Text)(Callee)(Function Signature)
                         # Better options for callee and function signature?
                         (r'(.*)( in )(.*)(\(.*\)\n)',
                          bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
                         # Regular line: (Whitespace)(Line Number)(Python Code)
                         (r'(\s*?)(\d+)(.*?\n)',
                          bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
                         # Emphasized line: (Arrow)(Line Number)(Python Code)
                         # Using Exception token so arrow color matches the Exception.
                         (r'(-*>?\s?)(\d+)(.*?\n)',
                          bygroups(Name.Exception, Literal.Number.Integer, Other)),
                         # (Exception Identifier)(Message)
                         (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
                          bygroups(Name.Exception, Text)),
                         # Tag everything else as Other, will be handled later.
                         (r'.*\n', Other),
                     ],
                 }
             class IPythonTracebackLexer(DelegatingLexer):
                 """
                 IPython traceback lexer.
                 For doctests, the tracebacks can be snipped as much as desired with the
                 exception to the lines that designate a traceback. For non-syntax error
                 tracebacks, this is the line of hyphens. For syntax error tracebacks,
                 this is the line which lists the File and line number.
                 """
                 # The lexer inherits from DelegatingLexer.  The "root" lexer is an
                 # appropriate IPython lexer, which depends on the value of the boolean
                 # `python3`.  First, we parse with the partial IPython traceback lexer.
                 # Then, any code marked with the "Other" token is delegated to the root
                 # lexer.
                 #
                 name = 'IPython Traceback'
                 aliases = ['ipythontb']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3tb']
                     else:
                         self.aliases = ['ipython2tb', 'ipythontb']
                     if self.python3:
                         IPyLexer = IPython3Lexer
                     else:
                         IPyLexer = IPythonLexer
                     DelegatingLexer.__init__(self, IPyLexer,
                                              IPythonPartialTracebackLexer, **options)
             @skip_doctest
             class IPythonConsoleLexer(Lexer):
                 """
                 An IPython console lexer for IPython code-blocks and doctests, such as:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: a = 'foo'
                         In [2]: a
                         Out[2]: 'foo'
                         In [3]: print a
                         foo
                         In [4]: 1 / 0
                 Support is also provided for IPython exceptions:
                 .. code-block:: rst
                     .. code-block:: ipythonconsole
                         In [1]: raise Exception
                         ---------------------------------------------------------------------------
                         Exception                                 Traceback (most recent call last)
                         <ipython-input-1-fca2ab0ca76b> in <module>()
                         ----> 1 raise Exception
                         Exception:
                 """
                 name = 'IPython console session'
                 aliases = ['ipythonconsole']
                 mimetypes = ['text/x-ipython-console']
                 # The regexps used to determine what is input and what is output.
                 # The default prompts for IPython are:
                 #
                 #     c.PromptManager.in_template  = 'In [\#]: '
                 #     c.PromptManager.in2_template = '   .\D.: '
                 #     c.PromptManager.out_template = 'Out[\#]: '
                 #
                 in1_regex = r'In \[[0-9]+\]: '
                 in2_regex = r'   \.\.+\.: '
                 out_regex = r'Out\[[0-9]+\]: '
                 #: The regex to determine when a traceback starts.
                 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^(  File)(.*)(, line )(\d+\n)')
                 def __init__(self, **options):
                     """Initialize the IPython console lexer.
                     Parameters
                     ----------
                     python3 : bool
                         If `True`, then the console inputs are parsed using a Python 3
                         lexer. Otherwise, they are parsed using a Python 2 lexer.
                     in1_regex : RegexObject
                         The compiled regular expression used to detect the start
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     in2_regex : RegexObject
                         The compiled regular expression used to detect the continuation
                         of inputs. Although the IPython configuration setting may have a
                         trailing whitespace, do not include it in the regex. If `None`,
                         then the default input prompt is assumed.
                     out_regex : RegexObject
                         The compiled regular expression used to detect outputs. If `None`,
                         then the default output prompt is assumed.
                     """
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipython3console']
                     else:
                         self.aliases = ['ipython2console', 'ipythonconsole']
                     in1_regex = options.get('in1_regex', self.in1_regex)
                     in2_regex = options.get('in2_regex', self.in2_regex)
                     out_regex = options.get('out_regex', self.out_regex)
                     # So that we can work with input and output prompts which have been
                     # rstrip'd (possibly by editors) we also need rstrip'd variants. If
                     # we do not do this, then such prompts will be tagged as 'output'.
                     # The reason can't just use the rstrip'd variants instead is because
                     # we want any whitespace associated with the prompt to be inserted
                     # with the token. This allows formatted code to be modified so as hide
                     # the appearance of prompts, with the whitespace included. One example
                     # use of this is in copybutton.js from the standard lib Python docs.
                     in1_regex_rstrip = in1_regex.rstrip() + '\n'
                     in2_regex_rstrip = in2_regex.rstrip() + '\n'
                     out_regex_rstrip = out_regex.rstrip() + '\n'
                     # Compile and save them all.
                     attrs = ['in1_regex', 'in2_regex', 'out_regex',
                              'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
                     for attr in attrs:
                         self.__setattr__(attr, re.compile(locals()[attr]))
                     Lexer.__init__(self, **options)
                     if self.python3:
                         pylexer = IPython3Lexer
                         tblexer = IPythonTracebackLexer
                     else:
                         pylexer = IPythonLexer
                         tblexer = IPythonTracebackLexer
                     self.pylexer = pylexer(**options)
                     self.tblexer = tblexer(**options)
                     self.reset()
                 def reset(self):
                     self.mode = 'output'
                     self.index = 0
                     self.buffer = u''
                     self.insertions = []
                 def buffered_tokens(self):
                     """
                     Generator of unprocessed tokens after doing insertions and before
                     changing to a new state.
                     """
                     if self.mode == 'output':
                         tokens = [(0, Generic.Output, self.buffer)]
                     elif self.mode == 'input':
                         tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
                     else: # traceback
                         tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
                     for i, t, v in do_insertions(self.insertions, tokens):
                         # All token indexes are relative to the buffer.
                         yield self.index + i, t, v
                     # Clear it all
                     self.index += len(self.buffer)
                     self.buffer = u''
                     self.insertions = []
                 def get_mci(self, line):
                     """
                     Parses the line and returns a 3-tuple: (mode, code, insertion).
                     `mode` is the next mode (or state) of the lexer, and is always equal
                     to 'input', 'output', or 'tb'.
                     `code` is a portion of the line that should be added to the buffer
                     corresponding to the next mode and eventually lexed by another lexer.
                     For example, `code` could be Python code if `mode` were 'input'.
                     `insertion` is a 3-tuple (index, token, text) representing an
                     unprocessed "token" that will be inserted into the stream of tokens
                     that are created from the buffer once we change modes. This is usually
                     the input or output prompt.
                     In general, the next mode depends on current mode and on the contents
                     of `line`.
                     """
                     # To reduce the number of regex match checks, we have multiple
                     # 'if' blocks instead of 'if-elif' blocks.
                     # Check for possible end of input
                     in2_match = self.in2_regex.match(line)
                     in2_match_rstrip = self.in2_regex_rstrip.match(line)
                     if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
                        in2_match_rstrip:
                         end_input = True
                     else:
                         end_input = False
                     if end_input and self.mode != 'tb':
                         # Only look for an end of input when not in tb mode.
                         # An ellipsis could appear within the traceback.
                         mode = 'output'
                         code = u''
                         insertion = (0, Generic.Prompt, line)
                         return mode, code, insertion
                     # Check for output prompt
                     out_match = self.out_regex.match(line)
                     out_match_rstrip = self.out_regex_rstrip.match(line)
                     if out_match or out_match_rstrip:
                         mode = 'output'
                         if out_match:
                             idx = out_match.end()
                         else:
                             idx = out_match_rstrip.end()
                         code = line[idx:]
                         # Use the 'heading' token for output.  We cannot use Generic.Error
                         # since it would conflict with exceptions.
                         insertion = (0, Generic.Heading, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (non stripped version)
                     in1_match = self.in1_regex.match(line)
                     if in1_match or (in2_match and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match:
                             idx = in1_match.end()
                         else: # in2_match
                             idx = in2_match.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for input or continuation prompt (stripped version)
                     in1_match_rstrip = self.in1_regex_rstrip.match(line)
                     if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
                         # New input or when not in tb, continued input.
                         # We do not check for continued input when in tb since it is
                         # allowable to replace a long stack with an ellipsis.
                         mode = 'input'
                         if in1_match_rstrip:
                             idx = in1_match_rstrip.end()
                         else: # in2_match
                             idx = in2_match_rstrip.end()
                         code = line[idx:]
                         insertion = (0, Generic.Prompt, line[:idx])
                         return mode, code, insertion
                     # Check for traceback
                     if self.ipytb_start.match(line):
                         mode = 'tb'
                         code = line
                         insertion = None
                         return mode, code, insertion
                     # All other stuff...
                     if self.mode in ('input', 'output'):
                         # We assume all other text is output. Multiline input that
                         # does not use the continuation marker cannot be detected.
                         # For example, the 3 in the following is clearly output:
                         #
                         #    In [1]: print 3
                         #    3
                         #
                         # But the following second line is part of the input:
                         #
                         #    In [2]: while True:
                         #        print True
                         #
                         # In both cases, the 2nd line will be 'output'.
                         #
                         mode = 'output'
                     else:
                         mode = 'tb'
                     code = line
                     insertion = None
                     return mode, code, insertion
                 def get_tokens_unprocessed(self, text):
                     self.reset()
                     for match in line_re.finditer(text):
                         line = match.group()
                         mode, code, insertion = self.get_mci(line)
                         if mode != self.mode:
                             # Yield buffered tokens before transitioning to new mode.
                             for token in self.buffered_tokens():
                                 yield token
                             self.mode = mode
                         if insertion:
                             self.insertions.append((len(self.buffer), [insertion]))
                         self.buffer += code
                     else:
                         for token in self.buffered_tokens():
                             yield token
             class IPyLexer(Lexer):
                 """
                 Primary lexer for all IPython-like code.
                 This is a simple helper lexer.  If the first line of the text begins with
                 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
                 lexer. If not, then the entire text is parsed with an IPython lexer.
                 The goal is to reduce the number of lexers that are registered
                 with Pygments.
                 """
                 name = 'IPy session'
                 aliases = ['ipy']
                 def __init__(self, **options):
                     self.python3 = get_bool_opt(options, 'python3', False)
                     if self.python3:
                         self.aliases = ['ipy3']
                     else:
                         self.aliases = ['ipy2', 'ipy']
                     Lexer.__init__(self, **options)
                     self.IPythonLexer = IPythonLexer(**options)
                     self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
                 def get_tokens_unprocessed(self, text):
                     # Search for the input prompt anywhere...this allows code blocks to
                     # begin with comments as well.
                     if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
                         lex = self.IPythonConsoleLexer
                     else:
                         lex = self.IPythonLexer
                     for token in lex.get_tokens_unprocessed(text):
                         yield token

             """Test lexers module"""
             #-----------------------------------------------------------------------------
             #  Copyright (C) 2014 The IPython Development Team
             #
             #  Distributed under the terms of the BSD License.  The full license is in
             #  the file COPYING, distributed as part of this software.
             #-----------------------------------------------------------------------------
             #-----------------------------------------------------------------------------
             # Imports
             #-----------------------------------------------------------------------------
             from pygments.token import Token
             from IPython.nbconvert.tests.base import TestsBase
             from .. import lexers
             #-----------------------------------------------------------------------------
             # Classes and functions
             #-----------------------------------------------------------------------------
             class TestLexers(TestsBase):
                 """Collection of lexers tests"""
                 def setUp(self):
                     self.lexer = lexers.IPythonLexer()
                 def testIPythonLexer(self):
                     fragment = '!echo $HOME\n'
                     tokens = [
                         (Token.Operator, '!'),
                         (Token.Name.Builtin, 'echo'),
                         (Token.Text, ' '),
                         (Token.Name.Variable, '$HOME'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
                     fragment_2 = '!' + fragment
                     tokens_2 = [
                         (Token.Operator, '!!'),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = '\t %%!\n' + fragment[1:]
                     tokens_2 = [
                         (Token.Text, '\t '),
                         (Token.Operator, '%%!'),
                         (Token.Text, '\n'),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = ' + fragment
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                     ] + tokens
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'x, = %sx ' + fragment[1:]
                     tokens_2 = [
                         (Token.Name, 'x'),
                         (Token.Punctuation, ','),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'sx'),
                         (Token.Text, ' '),
                     ] + tokens[1:]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = 'f = %R function () {}\n'
                     tokens_2 = [
                         (Token.Name, 'f'),
                         (Token.Text, ' '),
                         (Token.Operator, '='),
                         (Token.Text, ' '),
                         (Token.Operator, '%'),
                         (Token.Keyword, 'R'),
                         (Token.Text, ' function () {}\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = '\t%%xyz\n$foo\n'
                     tokens_2 = [
                         (Token.Text, '\t'),
                         (Token.Operator, '%%'),
                         (Token.Keyword, 'xyz'),
                         (Token.Text, '\n$foo\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
                     fragment_2 = '%system?\n'
                     tokens_2 = [
                         (Token.Operator, '%'),
                         (Token.Keyword, 'system'),
                         (Token.Operator, '?'),
                         (Token.Text, '\n'),
                     ]
                     self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
+                    fragment_2 = 'x != y\n'
+                    tokens_2 = [
+                        (Token.Name, 'x'),
+                        (Token.Text, ' '),
+                        (Token.Operator, '!='),
+                        (Token.Text, ' '),
+                        (Token.Name, 'y'),
+                        (Token.Text, '\n'),
+                    ]
+                    self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
+                    fragment_2 = ' ?math.sin\n'
+                    tokens_2 = [
+                        (Token.Text, ' '),
+                        (Token.Operator, '?'),
+                        (Token.Text, 'math.sin'),
+                        (Token.Text, '\n'),
+                    ]
+                    self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))