##// END OF EJS Templates
support 'foo*??' in lexer...
Min RK -
Show More
@@ -1,510 +1,511 b''
1 1 # -*- coding: utf-8 -*-
2 2 """
3 3 Defines a variety of Pygments lexers for highlighting IPython code.
4 4
5 5 This includes:
6 6
7 7 IPythonLexer, IPython3Lexer
8 8 Lexers for pure IPython (python + magic/shell commands)
9 9
10 10 IPythonPartialTracebackLexer, IPythonTracebackLexer
11 11 Supports 2.x and 3.x via keyword `python3`. The partial traceback
12 12 lexer reads everything but the Python code appearing in a traceback.
13 13 The full lexer combines the partial lexer with an IPython lexer.
14 14
15 15 IPythonConsoleLexer
16 16 A lexer for IPython console sessions, with support for tracebacks.
17 17
18 18 IPyLexer
19 19 A friendly lexer which examines the first line of text and from it,
20 20 decides whether to use an IPython lexer or an IPython console lexer.
21 21 This is probably the only lexer that needs to be explicitly added
22 22 to Pygments.
23 23
24 24 """
25 25 #-----------------------------------------------------------------------------
26 26 # Copyright (c) 2013, the IPython Development Team.
27 27 #
28 28 # Distributed under the terms of the Modified BSD License.
29 29 #
30 30 # The full license is in the file COPYING.txt, distributed with this software.
31 31 #-----------------------------------------------------------------------------
32 32
33 33 # Standard library
34 34 import re
35 35
36 36 # Third party
37 37 from pygments.lexers import BashLexer, PythonLexer, Python3Lexer
38 38 from pygments.lexer import (
39 39 Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
40 40 )
41 41 from pygments.token import (
42 42 Comment, Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
43 43 )
44 44 from pygments.util import get_bool_opt
45 45
46 46 # Local
47 47
48 48 line_re = re.compile('.*?\n')
49 49
50 50 __all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
51 51 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
52 52 'IPythonConsoleLexer', 'IPyLexer']
53 53
54 54 ipython_tokens = [
55 55 (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
56 56 (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
57 57 (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
58 58 (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
59 59 (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
60 60 using(BashLexer), Text)),
61 61 (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
62 62 (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
63 63 (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
64 64 (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
65 (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
65 66 ]
66 67
67 68 def build_ipy_lexer(python3):
68 69 """Builds IPython lexers depending on the value of `python3`.
69 70
70 71 The lexer inherits from an appropriate Python lexer and then adds
71 72 information about IPython specific keywords (i.e. magic commands,
72 73 shell commands, etc.)
73 74
74 75 Parameters
75 76 ----------
76 77 python3 : bool
77 78 If `True`, then build an IPython lexer from a Python 3 lexer.
78 79
79 80 """
80 81 # It would be nice to have a single IPython lexer class which takes
81 82 # a boolean `python3`. But since there are two Python lexer classes,
82 83 # we will also have two IPython lexer classes.
83 84 if python3:
84 85 PyLexer = Python3Lexer
85 86 clsname = 'IPython3Lexer'
86 87 name = 'IPython3'
87 88 aliases = ['ipython3']
88 89 doc = """IPython3 Lexer"""
89 90 else:
90 91 PyLexer = PythonLexer
91 92 clsname = 'IPythonLexer'
92 93 name = 'IPython'
93 94 aliases = ['ipython2', 'ipython']
94 95 doc = """IPython Lexer"""
95 96
96 97 tokens = PyLexer.tokens.copy()
97 98 tokens['root'] = ipython_tokens + tokens['root']
98 99
99 100 attrs = {'name': name, 'aliases': aliases, 'filenames': [],
100 101 '__doc__': doc, 'tokens': tokens}
101 102
102 103 return type(name, (PyLexer,), attrs)
103 104
104 105
105 106 IPython3Lexer = build_ipy_lexer(python3=True)
106 107 IPythonLexer = build_ipy_lexer(python3=False)
107 108
108 109
109 110 class IPythonPartialTracebackLexer(RegexLexer):
110 111 """
111 112 Partial lexer for IPython tracebacks.
112 113
113 114 Handles all the non-python output. This works for both Python 2.x and 3.x.
114 115
115 116 """
116 117 name = 'IPython Partial Traceback'
117 118
118 119 tokens = {
119 120 'root': [
120 121 # Tracebacks for syntax errors have a different style.
121 122 # For both types of tracebacks, we mark the first line with
122 123 # Generic.Traceback. For syntax errors, we mark the filename
123 124 # as we mark the filenames for non-syntax tracebacks.
124 125 #
125 126 # These two regexps define how IPythonConsoleLexer finds a
126 127 # traceback.
127 128 #
128 129 ## Non-syntax traceback
129 130 (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
130 131 ## Syntax traceback
131 132 (r'^( File)(.*)(, line )(\d+\n)',
132 133 bygroups(Generic.Traceback, Name.Namespace,
133 134 Generic.Traceback, Literal.Number.Integer)),
134 135
135 136 # (Exception Identifier)(Whitespace)(Traceback Message)
136 137 (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
137 138 bygroups(Name.Exception, Generic.Whitespace, Text)),
138 139 # (Module/Filename)(Text)(Callee)(Function Signature)
139 140 # Better options for callee and function signature?
140 141 (r'(.*)( in )(.*)(\(.*\)\n)',
141 142 bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
142 143 # Regular line: (Whitespace)(Line Number)(Python Code)
143 144 (r'(\s*?)(\d+)(.*?\n)',
144 145 bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
145 146 # Emphasized line: (Arrow)(Line Number)(Python Code)
146 147 # Using Exception token so arrow color matches the Exception.
147 148 (r'(-*>?\s?)(\d+)(.*?\n)',
148 149 bygroups(Name.Exception, Literal.Number.Integer, Other)),
149 150 # (Exception Identifier)(Message)
150 151 (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
151 152 bygroups(Name.Exception, Text)),
152 153 # Tag everything else as Other, will be handled later.
153 154 (r'.*\n', Other),
154 155 ],
155 156 }
156 157
157 158
158 159 class IPythonTracebackLexer(DelegatingLexer):
159 160 """
160 161 IPython traceback lexer.
161 162
162 163 For doctests, the tracebacks can be snipped as much as desired with the
163 164 exception to the lines that designate a traceback. For non-syntax error
164 165 tracebacks, this is the line of hyphens. For syntax error tracebacks,
165 166 this is the line which lists the File and line number.
166 167
167 168 """
168 169 # The lexer inherits from DelegatingLexer. The "root" lexer is an
169 170 # appropriate IPython lexer, which depends on the value of the boolean
170 171 # `python3`. First, we parse with the partial IPython traceback lexer.
171 172 # Then, any code marked with the "Other" token is delegated to the root
172 173 # lexer.
173 174 #
174 175 name = 'IPython Traceback'
175 176 aliases = ['ipythontb']
176 177
177 178 def __init__(self, **options):
178 179 self.python3 = get_bool_opt(options, 'python3', False)
179 180 if self.python3:
180 181 self.aliases = ['ipython3tb']
181 182 else:
182 183 self.aliases = ['ipython2tb', 'ipythontb']
183 184
184 185 if self.python3:
185 186 IPyLexer = IPython3Lexer
186 187 else:
187 188 IPyLexer = IPythonLexer
188 189
189 190 DelegatingLexer.__init__(self, IPyLexer,
190 191 IPythonPartialTracebackLexer, **options)
191 192
192 193 class IPythonConsoleLexer(Lexer):
193 194 """
194 195 An IPython console lexer for IPython code-blocks and doctests, such as:
195 196
196 197 .. code-block:: rst
197 198
198 199 .. code-block:: ipythonconsole
199 200
200 201 In [1]: a = 'foo'
201 202
202 203 In [2]: a
203 204 Out[2]: 'foo'
204 205
205 206 In [3]: print a
206 207 foo
207 208
208 209 In [4]: 1 / 0
209 210
210 211
211 212 Support is also provided for IPython exceptions:
212 213
213 214 .. code-block:: rst
214 215
215 216 .. code-block:: ipythonconsole
216 217
217 218 In [1]: raise Exception
218 219
219 220 ---------------------------------------------------------------------------
220 221 Exception Traceback (most recent call last)
221 222 <ipython-input-1-fca2ab0ca76b> in <module>()
222 223 ----> 1 raise Exception
223 224
224 225 Exception:
225 226
226 227 """
227 228 name = 'IPython console session'
228 229 aliases = ['ipythonconsole']
229 230 mimetypes = ['text/x-ipython-console']
230 231
231 232 # The regexps used to determine what is input and what is output.
232 233 # The default prompts for IPython are:
233 234 #
234 235 # c.PromptManager.in_template = 'In [\#]: '
235 236 # c.PromptManager.in2_template = ' .\D.: '
236 237 # c.PromptManager.out_template = 'Out[\#]: '
237 238 #
238 239 in1_regex = r'In \[[0-9]+\]: '
239 240 in2_regex = r' \.\.+\.: '
240 241 out_regex = r'Out\[[0-9]+\]: '
241 242
242 243 #: The regex to determine when a traceback starts.
243 244 ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
244 245
245 246 def __init__(self, **options):
246 247 """Initialize the IPython console lexer.
247 248
248 249 Parameters
249 250 ----------
250 251 python3 : bool
251 252 If `True`, then the console inputs are parsed using a Python 3
252 253 lexer. Otherwise, they are parsed using a Python 2 lexer.
253 254 in1_regex : RegexObject
254 255 The compiled regular expression used to detect the start
255 256 of inputs. Although the IPython configuration setting may have a
256 257 trailing whitespace, do not include it in the regex. If `None`,
257 258 then the default input prompt is assumed.
258 259 in2_regex : RegexObject
259 260 The compiled regular expression used to detect the continuation
260 261 of inputs. Although the IPython configuration setting may have a
261 262 trailing whitespace, do not include it in the regex. If `None`,
262 263 then the default input prompt is assumed.
263 264 out_regex : RegexObject
264 265 The compiled regular expression used to detect outputs. If `None`,
265 266 then the default output prompt is assumed.
266 267
267 268 """
268 269 self.python3 = get_bool_opt(options, 'python3', False)
269 270 if self.python3:
270 271 self.aliases = ['ipython3console']
271 272 else:
272 273 self.aliases = ['ipython2console', 'ipythonconsole']
273 274
274 275 in1_regex = options.get('in1_regex', self.in1_regex)
275 276 in2_regex = options.get('in2_regex', self.in2_regex)
276 277 out_regex = options.get('out_regex', self.out_regex)
277 278
278 279 # So that we can work with input and output prompts which have been
279 280 # rstrip'd (possibly by editors) we also need rstrip'd variants. If
280 281 # we do not do this, then such prompts will be tagged as 'output'.
281 282 # The reason can't just use the rstrip'd variants instead is because
282 283 # we want any whitespace associated with the prompt to be inserted
283 284 # with the token. This allows formatted code to be modified so as hide
284 285 # the appearance of prompts, with the whitespace included. One example
285 286 # use of this is in copybutton.js from the standard lib Python docs.
286 287 in1_regex_rstrip = in1_regex.rstrip() + '\n'
287 288 in2_regex_rstrip = in2_regex.rstrip() + '\n'
288 289 out_regex_rstrip = out_regex.rstrip() + '\n'
289 290
290 291 # Compile and save them all.
291 292 attrs = ['in1_regex', 'in2_regex', 'out_regex',
292 293 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
293 294 for attr in attrs:
294 295 self.__setattr__(attr, re.compile(locals()[attr]))
295 296
296 297 Lexer.__init__(self, **options)
297 298
298 299 if self.python3:
299 300 pylexer = IPython3Lexer
300 301 tblexer = IPythonTracebackLexer
301 302 else:
302 303 pylexer = IPythonLexer
303 304 tblexer = IPythonTracebackLexer
304 305
305 306 self.pylexer = pylexer(**options)
306 307 self.tblexer = tblexer(**options)
307 308
308 309 self.reset()
309 310
310 311 def reset(self):
311 312 self.mode = 'output'
312 313 self.index = 0
313 314 self.buffer = u''
314 315 self.insertions = []
315 316
316 317 def buffered_tokens(self):
317 318 """
318 319 Generator of unprocessed tokens after doing insertions and before
319 320 changing to a new state.
320 321
321 322 """
322 323 if self.mode == 'output':
323 324 tokens = [(0, Generic.Output, self.buffer)]
324 325 elif self.mode == 'input':
325 326 tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
326 327 else: # traceback
327 328 tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
328 329
329 330 for i, t, v in do_insertions(self.insertions, tokens):
330 331 # All token indexes are relative to the buffer.
331 332 yield self.index + i, t, v
332 333
333 334 # Clear it all
334 335 self.index += len(self.buffer)
335 336 self.buffer = u''
336 337 self.insertions = []
337 338
338 339 def get_mci(self, line):
339 340 """
340 341 Parses the line and returns a 3-tuple: (mode, code, insertion).
341 342
342 343 `mode` is the next mode (or state) of the lexer, and is always equal
343 344 to 'input', 'output', or 'tb'.
344 345
345 346 `code` is a portion of the line that should be added to the buffer
346 347 corresponding to the next mode and eventually lexed by another lexer.
347 348 For example, `code` could be Python code if `mode` were 'input'.
348 349
349 350 `insertion` is a 3-tuple (index, token, text) representing an
350 351 unprocessed "token" that will be inserted into the stream of tokens
351 352 that are created from the buffer once we change modes. This is usually
352 353 the input or output prompt.
353 354
354 355 In general, the next mode depends on current mode and on the contents
355 356 of `line`.
356 357
357 358 """
358 359 # To reduce the number of regex match checks, we have multiple
359 360 # 'if' blocks instead of 'if-elif' blocks.
360 361
361 362 # Check for possible end of input
362 363 in2_match = self.in2_regex.match(line)
363 364 in2_match_rstrip = self.in2_regex_rstrip.match(line)
364 365 if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
365 366 in2_match_rstrip:
366 367 end_input = True
367 368 else:
368 369 end_input = False
369 370 if end_input and self.mode != 'tb':
370 371 # Only look for an end of input when not in tb mode.
371 372 # An ellipsis could appear within the traceback.
372 373 mode = 'output'
373 374 code = u''
374 375 insertion = (0, Generic.Prompt, line)
375 376 return mode, code, insertion
376 377
377 378 # Check for output prompt
378 379 out_match = self.out_regex.match(line)
379 380 out_match_rstrip = self.out_regex_rstrip.match(line)
380 381 if out_match or out_match_rstrip:
381 382 mode = 'output'
382 383 if out_match:
383 384 idx = out_match.end()
384 385 else:
385 386 idx = out_match_rstrip.end()
386 387 code = line[idx:]
387 388 # Use the 'heading' token for output. We cannot use Generic.Error
388 389 # since it would conflict with exceptions.
389 390 insertion = (0, Generic.Heading, line[:idx])
390 391 return mode, code, insertion
391 392
392 393
393 394 # Check for input or continuation prompt (non stripped version)
394 395 in1_match = self.in1_regex.match(line)
395 396 if in1_match or (in2_match and self.mode != 'tb'):
396 397 # New input or when not in tb, continued input.
397 398 # We do not check for continued input when in tb since it is
398 399 # allowable to replace a long stack with an ellipsis.
399 400 mode = 'input'
400 401 if in1_match:
401 402 idx = in1_match.end()
402 403 else: # in2_match
403 404 idx = in2_match.end()
404 405 code = line[idx:]
405 406 insertion = (0, Generic.Prompt, line[:idx])
406 407 return mode, code, insertion
407 408
408 409 # Check for input or continuation prompt (stripped version)
409 410 in1_match_rstrip = self.in1_regex_rstrip.match(line)
410 411 if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
411 412 # New input or when not in tb, continued input.
412 413 # We do not check for continued input when in tb since it is
413 414 # allowable to replace a long stack with an ellipsis.
414 415 mode = 'input'
415 416 if in1_match_rstrip:
416 417 idx = in1_match_rstrip.end()
417 418 else: # in2_match
418 419 idx = in2_match_rstrip.end()
419 420 code = line[idx:]
420 421 insertion = (0, Generic.Prompt, line[:idx])
421 422 return mode, code, insertion
422 423
423 424 # Check for traceback
424 425 if self.ipytb_start.match(line):
425 426 mode = 'tb'
426 427 code = line
427 428 insertion = None
428 429 return mode, code, insertion
429 430
430 431 # All other stuff...
431 432 if self.mode in ('input', 'output'):
432 433 # We assume all other text is output. Multiline input that
433 434 # does not use the continuation marker cannot be detected.
434 435 # For example, the 3 in the following is clearly output:
435 436 #
436 437 # In [1]: print 3
437 438 # 3
438 439 #
439 440 # But the following second line is part of the input:
440 441 #
441 442 # In [2]: while True:
442 443 # print True
443 444 #
444 445 # In both cases, the 2nd line will be 'output'.
445 446 #
446 447 mode = 'output'
447 448 else:
448 449 mode = 'tb'
449 450
450 451 code = line
451 452 insertion = None
452 453
453 454 return mode, code, insertion
454 455
455 456 def get_tokens_unprocessed(self, text):
456 457 self.reset()
457 458 for match in line_re.finditer(text):
458 459 line = match.group()
459 460 mode, code, insertion = self.get_mci(line)
460 461
461 462 if mode != self.mode:
462 463 # Yield buffered tokens before transitioning to new mode.
463 464 for token in self.buffered_tokens():
464 465 yield token
465 466 self.mode = mode
466 467
467 468 if insertion:
468 469 self.insertions.append((len(self.buffer), [insertion]))
469 470 self.buffer += code
470 471 else:
471 472 for token in self.buffered_tokens():
472 473 yield token
473 474
474 475 class IPyLexer(Lexer):
475 476 """
476 477 Primary lexer for all IPython-like code.
477 478
478 479 This is a simple helper lexer. If the first line of the text begins with
479 480 "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
480 481 lexer. If not, then the entire text is parsed with an IPython lexer.
481 482
482 483 The goal is to reduce the number of lexers that are registered
483 484 with Pygments.
484 485
485 486 """
486 487 name = 'IPy session'
487 488 aliases = ['ipy']
488 489
489 490 def __init__(self, **options):
490 491 self.python3 = get_bool_opt(options, 'python3', False)
491 492 if self.python3:
492 493 self.aliases = ['ipy3']
493 494 else:
494 495 self.aliases = ['ipy2', 'ipy']
495 496
496 497 Lexer.__init__(self, **options)
497 498
498 499 self.IPythonLexer = IPythonLexer(**options)
499 500 self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
500 501
501 502 def get_tokens_unprocessed(self, text):
502 503 # Search for the input prompt anywhere...this allows code blocks to
503 504 # begin with comments as well.
504 505 if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
505 506 lex = self.IPythonConsoleLexer
506 507 else:
507 508 lex = self.IPythonLexer
508 509 for token in lex.get_tokens_unprocessed(text):
509 510 yield token
510 511
@@ -1,122 +1,130 b''
1 1 """Test lexers module"""
2 2
3 3 # Copyright (c) IPython Development Team.
4 4 # Distributed under the terms of the Modified BSD License.
5 5
6 6 from unittest import TestCase
7 7 from pygments.token import Token
8 8
9 9 from .. import lexers
10 10
11 11
12 12 class TestLexers(TestCase):
13 13 """Collection of lexers tests"""
14 14 def setUp(self):
15 15 self.lexer = lexers.IPythonLexer()
16 16
17 17 def testIPythonLexer(self):
18 18 fragment = '!echo $HOME\n'
19 19 tokens = [
20 20 (Token.Operator, '!'),
21 21 (Token.Name.Builtin, 'echo'),
22 22 (Token.Text, ' '),
23 23 (Token.Name.Variable, '$HOME'),
24 24 (Token.Text, '\n'),
25 25 ]
26 26 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
27 27
28 28 fragment_2 = '!' + fragment
29 29 tokens_2 = [
30 30 (Token.Operator, '!!'),
31 31 ] + tokens[1:]
32 32 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
33 33
34 34 fragment_2 = '\t %%!\n' + fragment[1:]
35 35 tokens_2 = [
36 36 (Token.Text, '\t '),
37 37 (Token.Operator, '%%!'),
38 38 (Token.Text, '\n'),
39 39 ] + tokens[1:]
40 40 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
41 41
42 42 fragment_2 = 'x = ' + fragment
43 43 tokens_2 = [
44 44 (Token.Name, 'x'),
45 45 (Token.Text, ' '),
46 46 (Token.Operator, '='),
47 47 (Token.Text, ' '),
48 48 ] + tokens
49 49 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
50 50
51 51 fragment_2 = 'x, = ' + fragment
52 52 tokens_2 = [
53 53 (Token.Name, 'x'),
54 54 (Token.Punctuation, ','),
55 55 (Token.Text, ' '),
56 56 (Token.Operator, '='),
57 57 (Token.Text, ' '),
58 58 ] + tokens
59 59 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
60 60
61 61 fragment_2 = 'x, = %sx ' + fragment[1:]
62 62 tokens_2 = [
63 63 (Token.Name, 'x'),
64 64 (Token.Punctuation, ','),
65 65 (Token.Text, ' '),
66 66 (Token.Operator, '='),
67 67 (Token.Text, ' '),
68 68 (Token.Operator, '%'),
69 69 (Token.Keyword, 'sx'),
70 70 (Token.Text, ' '),
71 71 ] + tokens[1:]
72 72 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
73 73
74 74 fragment_2 = 'f = %R function () {}\n'
75 75 tokens_2 = [
76 76 (Token.Name, 'f'),
77 77 (Token.Text, ' '),
78 78 (Token.Operator, '='),
79 79 (Token.Text, ' '),
80 80 (Token.Operator, '%'),
81 81 (Token.Keyword, 'R'),
82 82 (Token.Text, ' function () {}\n'),
83 83 ]
84 84 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
85 85
86 86 fragment_2 = '\t%%xyz\n$foo\n'
87 87 tokens_2 = [
88 88 (Token.Text, '\t'),
89 89 (Token.Operator, '%%'),
90 90 (Token.Keyword, 'xyz'),
91 91 (Token.Text, '\n$foo\n'),
92 92 ]
93 93 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
94 94
95 95 fragment_2 = '%system?\n'
96 96 tokens_2 = [
97 97 (Token.Operator, '%'),
98 98 (Token.Keyword, 'system'),
99 99 (Token.Operator, '?'),
100 100 (Token.Text, '\n'),
101 101 ]
102 102 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
103 103
104 104 fragment_2 = 'x != y\n'
105 105 tokens_2 = [
106 106 (Token.Name, 'x'),
107 107 (Token.Text, ' '),
108 108 (Token.Operator, '!='),
109 109 (Token.Text, ' '),
110 110 (Token.Name, 'y'),
111 111 (Token.Text, '\n'),
112 112 ]
113 113 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
114 114
115 115 fragment_2 = ' ?math.sin\n'
116 116 tokens_2 = [
117 117 (Token.Text, ' '),
118 118 (Token.Operator, '?'),
119 119 (Token.Text, 'math.sin'),
120 120 (Token.Text, '\n'),
121 121 ]
122 122 self.assertEqual(tokens_2, list(self.lexer.get_tokens(fragment_2)))
123
124 fragment = ' *int*?\n'
125 tokens = [
126 (Token.Text, ' *int*'),
127 (Token.Operator, '?'),
128 (Token.Text, '\n'),
129 ]
130 self.assertEqual(tokens, list(self.lexer.get_tokens(fragment)))
General Comments 0
You need to be logged in to leave comments. Login now