##// END OF EJS Templates
remove unused pyparsing
Julian Taylor -
Show More
@@ -115,8 +115,7 b' class PrefilterManager(Configurable):'
115 The transformers are instances of :class:`PrefilterTransformer` and have
115 The transformers are instances of :class:`PrefilterTransformer` and have
116 a single method :meth:`transform` that takes a line and returns a
116 a single method :meth:`transform` that takes a line and returns a
117 transformed line. The transformation can be accomplished using any
117 transformed line. The transformation can be accomplished using any
118 tool, but our current ones use regular expressions for speed. We also
118 tool, but our current ones use regular expressions for speed.
119 ship :mod:`pyparsing` in :mod:`IPython.external` for use in transformers.
120
119
121 After all the transformers have been run, the line is fed to the checkers,
120 After all the transformers have been run, the line is fed to the checkers,
122 which are instances of :class:`PrefilterChecker`. The line is passed to
121 which are instances of :class:`PrefilterChecker`. The line is passed to
@@ -1,4 +0,0 b''
1 try:
2 from pyparsing import *
3 except ImportError:
4 from _pyparsing import *
This diff has been collapsed as it changes many lines, (3708 lines changed) Show them Hide them
@@ -1,3708 +0,0 b''
1 # -*- coding: utf-8 -*-
2 # module pyparsing.py
3 #
4 # Copyright (c) 2003-2009 Paul T. McGuire
5 #
6 # Permission is hereby granted, free of charge, to any person obtaining
7 # a copy of this software and associated documentation files (the
8 # "Software"), to deal in the Software without restriction, including
9 # without limitation the rights to use, copy, modify, merge, publish,
10 # distribute, sublicense, and/or sell copies of the Software, and to
11 # permit persons to whom the Software is furnished to do so, subject to
12 # the following conditions:
13 #
14 # The above copyright notice and this permission notice shall be
15 # included in all copies or substantial portions of the Software.
16 #
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #
25 #from __future__ import generators
26
27 __doc__ = \
28 """
29 pyparsing module - Classes and methods to define and execute parsing grammars
30
31 The pyparsing module is an alternative approach to creating and executing simple grammars,
32 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
33 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
34 provides a library of classes that you use to construct the grammar directly in Python.
35
36 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
37
38 from pyparsing import Word, alphas
39
40 # define grammar of a greeting
41 greet = Word( alphas ) + "," + Word( alphas ) + "!"
42
43 hello = "Hello, World!"
44 print hello, "->", greet.parseString( hello )
45
46 The program outputs the following::
47
48 Hello, World! -> ['Hello', ',', 'World', '!']
49
50 The Python representation of the grammar is quite readable, owing to the self-explanatory
51 class names, and the use of '+', '|' and '^' operators.
52
53 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
54 object with named attributes.
55
56 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
57 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
58 - quoted strings
59 - embedded comments
60 """
61
62 __version__ = "1.5.2"
63 __versionTime__ = "17 February 2009 19:45"
64 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
65
66 import string
67 from weakref import ref as wkref
68 import copy
69 import sys
70 import warnings
71 import re
72 import sre_constants
73 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
74
75 __all__ = [
76 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
77 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
78 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
79 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
80 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
81 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
82 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
83 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
84 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
85 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
86 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
87 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
88 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
89 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
90 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
91 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
92 'indentedBlock', 'originalTextFor',
93 ]
94
95
96 """
97 Detect if we are running version 3.X and make appropriate changes
98 Robert A. Clark
99 """
100 if sys.version_info[0] > 2:
101 _PY3K = True
102 _MAX_INT = sys.maxsize
103 basestring = str
104 else:
105 _PY3K = False
106 _MAX_INT = sys.maxint
107
108 if not _PY3K:
109 def _ustr(obj):
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
112 then < returns the unicode object | encodes it with the default encoding | ... >.
113 """
114 if isinstance(obj,unicode):
115 return obj
116
117 try:
118 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
119 # it won't break any existing code.
120 return str(obj)
121
122 except UnicodeEncodeError:
123 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
124 # state that "The return value must be a string object". However, does a
125 # unicode object (being a subclass of basestring) count as a "string
126 # object"?
127 # If so, then return a unicode object:
128 return unicode(obj)
129 # Else encode it... but how? There are many choices... :)
130 # Replace unprintables with escape codes?
131 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
132 # Replace unprintables with question marks?
133 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
134 # ...
135 else:
136 _ustr = str
137 unichr = chr
138
139 if not _PY3K:
140 def _str2dict(strg):
141 return dict( [(c,0) for c in strg] )
142 else:
143 _str2dict = set
144
145 def _xml_escape(data):
146 """Escape &, <, >, ", ', etc. in a string of data."""
147
148 # ampersand must be replaced first
149 from_symbols = '&><"\''
150 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
151 for from_,to_ in zip(from_symbols, to_symbols):
152 data = data.replace(from_, to_)
153 return data
154
155 class _Constants(object):
156 pass
157
158 if not _PY3K:
159 alphas = string.lowercase + string.uppercase
160 else:
161 alphas = string.ascii_lowercase + string.ascii_uppercase
162 nums = string.digits
163 hexnums = nums + "ABCDEFabcdef"
164 alphanums = alphas + nums
165 _bslash = chr(92)
166 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
167
168 class ParseBaseException(Exception):
169 """base exception class for all parsing runtime exceptions"""
170 # Performance tuning: we construct a *lot* of these, so keep this
171 # constructor as small and fast as possible
172 def __init__( self, pstr, loc=0, msg=None, elem=None ):
173 self.loc = loc
174 if msg is None:
175 self.msg = pstr
176 self.pstr = ""
177 else:
178 self.msg = msg
179 self.pstr = pstr
180 self.parserElement = elem
181
182 def __getattr__( self, aname ):
183 """supported attributes by name are:
184 - lineno - returns the line number of the exception text
185 - col - returns the column number of the exception text
186 - line - returns the line containing the exception text
187 """
188 if( aname == "lineno" ):
189 return lineno( self.loc, self.pstr )
190 elif( aname in ("col", "column") ):
191 return col( self.loc, self.pstr )
192 elif( aname == "line" ):
193 return line( self.loc, self.pstr )
194 else:
195 raise AttributeError(aname)
196
197 def __str__( self ):
198 return "%s (at char %d), (line:%d, col:%d)" % \
199 ( self.msg, self.loc, self.lineno, self.column )
200 def __repr__( self ):
201 return _ustr(self)
202 def markInputline( self, markerString = ">!<" ):
203 """Extracts the exception line from the input string, and marks
204 the location of the exception with a special symbol.
205 """
206 line_str = self.line
207 line_column = self.column - 1
208 if markerString:
209 line_str = "".join( [line_str[:line_column],
210 markerString, line_str[line_column:]])
211 return line_str.strip()
212 def __dir__(self):
213 return "loc msg pstr parserElement lineno col line " \
214 "markInputLine __str__ __repr__".split()
215
216 class ParseException(ParseBaseException):
217 """exception thrown when parse expressions don't match class;
218 supported attributes by name are:
219 - lineno - returns the line number of the exception text
220 - col - returns the column number of the exception text
221 - line - returns the line containing the exception text
222 """
223 pass
224
225 class ParseFatalException(ParseBaseException):
226 """user-throwable exception thrown when inconsistent parse content
227 is found; stops all parsing immediately"""
228 pass
229
230 class ParseSyntaxException(ParseFatalException):
231 """just like ParseFatalException, but thrown internally when an
232 ErrorStop indicates that parsing is to stop immediately because
233 an unbacktrackable syntax error has been found"""
234 def __init__(self, pe):
235 super(ParseSyntaxException, self).__init__(
236 pe.pstr, pe.loc, pe.msg, pe.parserElement)
237
238 #~ class ReparseException(ParseBaseException):
239 #~ """Experimental class - parse actions can raise this exception to cause
240 #~ pyparsing to reparse the input string:
241 #~ - with a modified input string, and/or
242 #~ - with a modified start location
243 #~ Set the values of the ReparseException in the constructor, and raise the
244 #~ exception in a parse action to cause pyparsing to use the new string/location.
245 #~ Setting the values as None causes no change to be made.
246 #~ """
247 #~ def __init_( self, newstring, restartLoc ):
248 #~ self.newParseText = newstring
249 #~ self.reparseLoc = restartLoc
250
251 class RecursiveGrammarException(Exception):
252 """exception thrown by validate() if the grammar could be improperly recursive"""
253 def __init__( self, parseElementList ):
254 self.parseElementTrace = parseElementList
255
256 def __str__( self ):
257 return "RecursiveGrammarException: %s" % self.parseElementTrace
258
259 class _ParseResultsWithOffset(object):
260 def __init__(self,p1,p2):
261 self.tup = (p1,p2)
262 def __getitem__(self,i):
263 return self.tup[i]
264 def __repr__(self):
265 return repr(self.tup)
266 def setOffset(self,i):
267 self.tup = (self.tup[0],i)
268
269 class ParseResults(object):
270 """Structured parse results, to provide multiple means of access to the parsed data:
271 - as a list (len(results))
272 - by list index (results[0], results[1], etc.)
273 - by attribute (results.<resultsName>)
274 """
275 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
276 def __new__(cls, toklist, name=None, asList=True, modal=True ):
277 if isinstance(toklist, cls):
278 return toklist
279 retobj = object.__new__(cls)
280 retobj.__doinit = True
281 return retobj
282
283 # Performance tuning: we construct a *lot* of these, so keep this
284 # constructor as small and fast as possible
285 def __init__( self, toklist, name=None, asList=True, modal=True ):
286 if self.__doinit:
287 self.__doinit = False
288 self.__name = None
289 self.__parent = None
290 self.__accumNames = {}
291 if isinstance(toklist, list):
292 self.__toklist = toklist[:]
293 else:
294 self.__toklist = [toklist]
295 self.__tokdict = dict()
296
297 if name:
298 if not modal:
299 self.__accumNames[name] = 0
300 if isinstance(name,int):
301 name = _ustr(name) # will always return a str, but use _ustr for consistency
302 self.__name = name
303 if not toklist in (None,'',[]):
304 if isinstance(toklist,basestring):
305 toklist = [ toklist ]
306 if asList:
307 if isinstance(toklist,ParseResults):
308 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
309 else:
310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
311 self[name].__name = name
312 else:
313 try:
314 self[name] = toklist[0]
315 except (KeyError,TypeError,IndexError):
316 self[name] = toklist
317
318 def __getitem__( self, i ):
319 if isinstance( i, (int,slice) ):
320 return self.__toklist[i]
321 else:
322 if i not in self.__accumNames:
323 return self.__tokdict[i][-1][0]
324 else:
325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
327 def __setitem__( self, k, v ):
328 if isinstance(v,_ParseResultsWithOffset):
329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
330 sub = v[0]
331 elif isinstance(k,int):
332 self.__toklist[k] = v
333 sub = v
334 else:
335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
336 sub = v
337 if isinstance(sub,ParseResults):
338 sub.__parent = wkref(self)
339
340 def __delitem__( self, i ):
341 if isinstance(i,(int,slice)):
342 mylen = len( self.__toklist )
343 del self.__toklist[i]
344
345 # convert int to slice
346 if isinstance(i, int):
347 if i < 0:
348 i += mylen
349 i = slice(i, i+1)
350 # get removed indices
351 removed = list(range(*i.indices(mylen)))
352 removed.reverse()
353 # fixup indices in token dictionary
354 for name in self.__tokdict:
355 occurrences = self.__tokdict[name]
356 for j in removed:
357 for k, (value, position) in enumerate(occurrences):
358 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
359 else:
360 del self.__tokdict[i]
361
362 def __contains__( self, k ):
363 return k in self.__tokdict
364
365 def __len__( self ): return len( self.__toklist )
366 def __bool__(self): return len( self.__toklist ) > 0
367 __nonzero__ = __bool__
368 def __iter__( self ): return iter( self.__toklist )
369 def __reversed__( self ): return iter( reversed(self.__toklist) )
370 def keys( self ):
371 """Returns all named result keys."""
372 return self.__tokdict.keys()
373
374 def pop( self, index=-1 ):
375 """Removes and returns item at specified index (default=last).
376 Will work with either numeric indices or dict-key indicies."""
377 ret = self[index]
378 del self[index]
379 return ret
380
381 def get(self, key, defaultValue=None):
382 """Returns named result matching the given key, or if there is no
383 such name, then returns the given defaultValue or None if no
384 defaultValue is specified."""
385 if key in self:
386 return self[key]
387 else:
388 return defaultValue
389
390 def insert( self, index, insStr ):
391 self.__toklist.insert(index, insStr)
392 # fixup indices in token dictionary
393 for name in self.__tokdict:
394 occurrences = self.__tokdict[name]
395 for k, (value, position) in enumerate(occurrences):
396 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
397
398 def items( self ):
399 """Returns all named result keys and values as a list of tuples."""
400 return [(k,self[k]) for k in self.__tokdict]
401
402 def values( self ):
403 """Returns all named result values."""
404 return [ v[-1][0] for v in self.__tokdict.itervalues() ]
405
406 def __getattr__( self, name ):
407 if name not in self.__slots__:
408 if name in self.__tokdict:
409 if name not in self.__accumNames:
410 return self.__tokdict[name][-1][0]
411 else:
412 return ParseResults([ v[0] for v in self.__tokdict[name] ])
413 else:
414 return ""
415 return None
416
417 def __add__( self, other ):
418 ret = self.copy()
419 ret += other
420 return ret
421
422 def __iadd__( self, other ):
423 if other.__tokdict:
424 offset = len(self.__toklist)
425 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
426 otheritems = other.__tokdict.iteritems()
427 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
428 for (k,vlist) in otheritems for v in vlist]
429 for k,v in otherdictitems:
430 self[k] = v
431 if isinstance(v[0],ParseResults):
432 v[0].__parent = wkref(self)
433
434 self.__toklist += other.__toklist
435 self.__accumNames.update( other.__accumNames )
436 del other
437 return self
438
439 def __repr__( self ):
440 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
441
442 def __str__( self ):
443 out = "["
444 sep = ""
445 for i in self.__toklist:
446 if isinstance(i, ParseResults):
447 out += sep + _ustr(i)
448 else:
449 out += sep + repr(i)
450 sep = ", "
451 out += "]"
452 return out
453
454 def _asStringList( self, sep='' ):
455 out = []
456 for item in self.__toklist:
457 if out and sep:
458 out.append(sep)
459 if isinstance( item, ParseResults ):
460 out += item._asStringList()
461 else:
462 out.append( _ustr(item) )
463 return out
464
465 def asList( self ):
466 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
467 out = []
468 for res in self.__toklist:
469 if isinstance(res,ParseResults):
470 out.append( res.asList() )
471 else:
472 out.append( res )
473 return out
474
475 def asDict( self ):
476 """Returns the named parse results as dictionary."""
477 return dict( self.items() )
478
479 def copy( self ):
480 """Returns a new copy of a ParseResults object."""
481 ret = ParseResults( self.__toklist )
482 ret.__tokdict = self.__tokdict.copy()
483 ret.__parent = self.__parent
484 ret.__accumNames.update( self.__accumNames )
485 ret.__name = self.__name
486 return ret
487
488 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
489 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
490 nl = "\n"
491 out = []
492 namedItems = dict([(v[1],k) for (k,vlist) in self.__tokdict.iteritems()
493 for v in vlist ] )
494 nextLevelIndent = indent + " "
495
496 # collapse out indents if formatting is not desired
497 if not formatted:
498 indent = ""
499 nextLevelIndent = ""
500 nl = ""
501
502 selfTag = None
503 if doctag is not None:
504 selfTag = doctag
505 else:
506 if self.__name:
507 selfTag = self.__name
508
509 if not selfTag:
510 if namedItemsOnly:
511 return ""
512 else:
513 selfTag = "ITEM"
514
515 out += [ nl, indent, "<", selfTag, ">" ]
516
517 worklist = self.__toklist
518 for i,res in enumerate(worklist):
519 if isinstance(res,ParseResults):
520 if i in namedItems:
521 out += [ res.asXML(namedItems[i],
522 namedItemsOnly and doctag is None,
523 nextLevelIndent,
524 formatted)]
525 else:
526 out += [ res.asXML(None,
527 namedItemsOnly and doctag is None,
528 nextLevelIndent,
529 formatted)]
530 else:
531 # individual token, see if there is a name for it
532 resTag = None
533 if i in namedItems:
534 resTag = namedItems[i]
535 if not resTag:
536 if namedItemsOnly:
537 continue
538 else:
539 resTag = "ITEM"
540 xmlBodyText = _xml_escape(_ustr(res))
541 out += [ nl, nextLevelIndent, "<", resTag, ">",
542 xmlBodyText,
543 "</", resTag, ">" ]
544
545 out += [ nl, indent, "</", selfTag, ">" ]
546 return "".join(out)
547
548 def __lookup(self,sub):
549 for k,vlist in self.__tokdict.iteritems():
550 for v,loc in vlist:
551 if sub is v:
552 return k
553 return None
554
555 def getName(self):
556 """Returns the results name for this token expression."""
557 if self.__name:
558 return self.__name
559 elif self.__parent:
560 par = self.__parent()
561 if par:
562 return par.__lookup(self)
563 else:
564 return None
565 elif (len(self) == 1 and
566 len(self.__tokdict) == 1 and
567 self.__tokdict.values()[0][0][1] in (0,-1)):
568 return self.__tokdict.keys()[0]
569 else:
570 return None
571
572 def dump(self,indent='',depth=0):
573 """Diagnostic method for listing out the contents of a ParseResults.
574 Accepts an optional indent argument so that this string can be embedded
575 in a nested display of other data."""
576 out = []
577 out.append( indent+_ustr(self.asList()) )
578 keys = self.items()
579 keys.sort()
580 for k,v in keys:
581 if out:
582 out.append('\n')
583 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
584 if isinstance(v,ParseResults):
585 if v.keys():
586 #~ out.append('\n')
587 out.append( v.dump(indent,depth+1) )
588 #~ out.append('\n')
589 else:
590 out.append(_ustr(v))
591 else:
592 out.append(_ustr(v))
593 #~ out.append('\n')
594 return "".join(out)
595
596 # add support for pickle protocol
597 def __getstate__(self):
598 return ( self.__toklist,
599 ( self.__tokdict.copy(),
600 self.__parent is not None and self.__parent() or None,
601 self.__accumNames,
602 self.__name ) )
603
604 def __setstate__(self,state):
605 self.__toklist = state[0]
606 self.__tokdict, \
607 par, \
608 inAccumNames, \
609 self.__name = state[1]
610 self.__accumNames = {}
611 self.__accumNames.update(inAccumNames)
612 if par is not None:
613 self.__parent = wkref(par)
614 else:
615 self.__parent = None
616
617 def __dir__(self):
618 return dir(super(ParseResults,self)) + self.keys()
619
620 def col (loc,strg):
621 """Returns current column within a string, counting newlines as line separators.
622 The first column is number 1.
623
624 Note: the default parsing behavior is to expand tabs in the input string
625 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
626 on parsing strings containing <TAB>s, and suggested methods to maintain a
627 consistent view of the parsed string, the parse location, and line and column
628 positions within the parsed string.
629 """
630 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
631
632 def lineno(loc,strg):
633 """Returns current line number within a string, counting newlines as line separators.
634 The first line is number 1.
635
636 Note: the default parsing behavior is to expand tabs in the input string
637 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
638 on parsing strings containing <TAB>s, and suggested methods to maintain a
639 consistent view of the parsed string, the parse location, and line and column
640 positions within the parsed string.
641 """
642 return strg.count("\n",0,loc) + 1
643
644 def line( loc, strg ):
645 """Returns the line of text containing loc within a string, counting newlines as line separators.
646 """
647 lastCR = strg.rfind("\n", 0, loc)
648 nextCR = strg.find("\n", loc)
649 if nextCR > 0:
650 return strg[lastCR+1:nextCR]
651 else:
652 return strg[lastCR+1:]
653
654 def _defaultStartDebugAction( instring, loc, expr ):
655 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
656
657 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
658 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
659
660 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
661 print ("Exception raised:" + _ustr(exc))
662
663 def nullDebugAction(*args):
664 """'Do-nothing' debug action, to suppress debugging output during parsing."""
665 pass
666
667 class ParserElement(object):
668 """Abstract base level parser element class."""
669 DEFAULT_WHITE_CHARS = " \n\t\r"
670
671 def setDefaultWhitespaceChars( chars ):
672 """Overrides the default whitespace chars
673 """
674 ParserElement.DEFAULT_WHITE_CHARS = chars
675 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
676
677 def __init__( self, savelist=False ):
678 self.parseAction = list()
679 self.failAction = None
680 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
681 self.strRepr = None
682 self.resultsName = None
683 self.saveAsList = savelist
684 self.skipWhitespace = True
685 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
686 self.copyDefaultWhiteChars = True
687 self.mayReturnEmpty = False # used when checking for left-recursion
688 self.keepTabs = False
689 self.ignoreExprs = list()
690 self.debug = False
691 self.streamlined = False
692 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
693 self.errmsg = ""
694 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
695 self.debugActions = ( None, None, None ) #custom debug actions
696 self.re = None
697 self.callPreparse = True # used to avoid redundant calls to preParse
698 self.callDuringTry = False
699
700 def copy( self ):
701 """Make a copy of this ParserElement. Useful for defining different parse actions
702 for the same parsing pattern, using copies of the original parse element."""
703 cpy = copy.copy( self )
704 cpy.parseAction = self.parseAction[:]
705 cpy.ignoreExprs = self.ignoreExprs[:]
706 if self.copyDefaultWhiteChars:
707 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
708 return cpy
709
710 def setName( self, name ):
711 """Define name for this expression, for use in debugging."""
712 self.name = name
713 self.errmsg = "Expected " + self.name
714 if hasattr(self,"exception"):
715 self.exception.msg = self.errmsg
716 return self
717
718 def setResultsName( self, name, listAllMatches=False ):
719 """Define name for referencing matching tokens as a nested attribute
720 of the returned parse results.
721 NOTE: this returns a *copy* of the original ParserElement object;
722 this is so that the client can define a basic element, such as an
723 integer, and reference it in multiple places with different names.
724 """
725 newself = self.copy()
726 newself.resultsName = name
727 newself.modalResults = not listAllMatches
728 return newself
729
730 def setBreak(self,breakFlag = True):
731 """Method to invoke the Python pdb debugger when this element is
732 about to be parsed. Set breakFlag to True to enable, False to
733 disable.
734 """
735 if breakFlag:
736 _parseMethod = self._parse
737 def breaker(instring, loc, doActions=True, callPreParse=True):
738 import pdb
739 pdb.set_trace()
740 return _parseMethod( instring, loc, doActions, callPreParse )
741 breaker._originalParseMethod = _parseMethod
742 self._parse = breaker
743 else:
744 if hasattr(self._parse,"_originalParseMethod"):
745 self._parse = self._parse._originalParseMethod
746 return self
747
748 def _normalizeParseActionArgs( f ):
749 """Internal method used to decorate parse actions that take fewer than 3 arguments,
750 so that all parse actions can be called as f(s,l,t)."""
751 STAR_ARGS = 4
752
753 try:
754 restore = None
755 if isinstance(f,type):
756 restore = f
757 f = f.__init__
758 if not _PY3K:
759 codeObj = f.func_code
760 else:
761 codeObj = f.code
762 if codeObj.co_flags & STAR_ARGS:
763 return f
764 numargs = codeObj.co_argcount
765 if not _PY3K:
766 if hasattr(f,"im_self"):
767 numargs -= 1
768 else:
769 if hasattr(f,"__self__"):
770 numargs -= 1
771 if restore:
772 f = restore
773 except AttributeError:
774 try:
775 if not _PY3K:
776 call_im_func_code = f.__call__.im_func.func_code
777 else:
778 call_im_func_code = f.__code__
779
780 # not a function, must be a callable object, get info from the
781 # im_func binding of its bound __call__ method
782 if call_im_func_code.co_flags & STAR_ARGS:
783 return f
784 numargs = call_im_func_code.co_argcount
785 if not _PY3K:
786 if hasattr(f.__call__,"im_self"):
787 numargs -= 1
788 else:
789 if hasattr(f.__call__,"__self__"):
790 numargs -= 0
791 except AttributeError:
792 if not _PY3K:
793 call_func_code = f.__call__.func_code
794 else:
795 call_func_code = f.__call__.__code__
796 # not a bound method, get info directly from __call__ method
797 if call_func_code.co_flags & STAR_ARGS:
798 return f
799 numargs = call_func_code.co_argcount
800 if not _PY3K:
801 if hasattr(f.__call__,"im_self"):
802 numargs -= 1
803 else:
804 if hasattr(f.__call__,"__self__"):
805 numargs -= 1
806
807
808 #~ print ("adding function %s with %d args" % (f.func_name,numargs))
809 if numargs == 3:
810 return f
811 else:
812 if numargs > 3:
813 def tmp(s,l,t):
814 return f(f.__call__.__self__, s,l,t)
815 if numargs == 2:
816 def tmp(s,l,t):
817 return f(l,t)
818 elif numargs == 1:
819 def tmp(s,l,t):
820 return f(t)
821 else: #~ numargs == 0:
822 def tmp(s,l,t):
823 return f()
824 try:
825 tmp.__name__ = f.__name__
826 except (AttributeError,TypeError):
827 # no need for special handling if attribute doesnt exist
828 pass
829 try:
830 tmp.__doc__ = f.__doc__
831 except (AttributeError,TypeError):
832 # no need for special handling if attribute doesnt exist
833 pass
834 try:
835 tmp.__dict__.update(f.__dict__)
836 except (AttributeError,TypeError):
837 # no need for special handling if attribute doesnt exist
838 pass
839 return tmp
840 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
841
842 def setParseAction( self, *fns, **kwargs ):
843 """Define action to perform when successfully matching parse element definition.
844 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
845 fn(loc,toks), fn(toks), or just fn(), where:
846 - s = the original string being parsed (see note below)
847 - loc = the location of the matching substring
848 - toks = a list of the matched tokens, packaged as a ParseResults object
849 If the functions in fns modify the tokens, they can return them as the return
850 value from fn, and the modified list of tokens will replace the original.
851 Otherwise, fn does not need to return any value.
852
853 Note: the default parsing behavior is to expand tabs in the input string
854 before starting the parsing process. See L{I{parseString}<parseString>} for more information
855 on parsing strings containing <TAB>s, and suggested methods to maintain a
856 consistent view of the parsed string, the parse location, and line and column
857 positions within the parsed string.
858 """
859 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
860 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
861 return self
862
863 def addParseAction( self, *fns, **kwargs ):
864 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
865 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
866 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
867 return self
868
869 def setFailAction( self, fn ):
870 """Define action to perform if parsing fails at this expression.
871 Fail acton fn is a callable function that takes the arguments
872 fn(s,loc,expr,err) where:
873 - s = string being parsed
874 - loc = location where expression match was attempted and failed
875 - expr = the parse expression that failed
876 - err = the exception thrown
877 The function returns no value. It may throw ParseFatalException
878 if it is desired to stop parsing immediately."""
879 self.failAction = fn
880 return self
881
882 def _skipIgnorables( self, instring, loc ):
883 exprsFound = True
884 while exprsFound:
885 exprsFound = False
886 for e in self.ignoreExprs:
887 try:
888 while 1:
889 loc,dummy = e._parse( instring, loc )
890 exprsFound = True
891 except ParseException:
892 pass
893 return loc
894
895 def preParse( self, instring, loc ):
896 if self.ignoreExprs:
897 loc = self._skipIgnorables( instring, loc )
898
899 if self.skipWhitespace:
900 wt = self.whiteChars
901 instrlen = len(instring)
902 while loc < instrlen and instring[loc] in wt:
903 loc += 1
904
905 return loc
906
907 def parseImpl( self, instring, loc, doActions=True ):
908 return loc, []
909
910 def postParse( self, instring, loc, tokenlist ):
911 return tokenlist
912
913 #~ @profile
914 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
915 debugging = ( self.debug ) #and doActions )
916
917 if debugging or self.failAction:
918 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
919 if (self.debugActions[0] ):
920 self.debugActions[0]( instring, loc, self )
921 if callPreParse and self.callPreparse:
922 preloc = self.preParse( instring, loc )
923 else:
924 preloc = loc
925 tokensStart = loc
926 try:
927 try:
928 loc,tokens = self.parseImpl( instring, preloc, doActions )
929 except IndexError:
930 raise ParseException( instring, len(instring), self.errmsg, self )
931 except ParseBaseException, err:
932 #~ print ("Exception raised:", err)
933 if self.debugActions[2]:
934 self.debugActions[2]( instring, tokensStart, self, err )
935 if self.failAction:
936 self.failAction( instring, tokensStart, self, err )
937 raise
938 else:
939 if callPreParse and self.callPreparse:
940 preloc = self.preParse( instring, loc )
941 else:
942 preloc = loc
943 tokensStart = loc
944 if self.mayIndexError or loc >= len(instring):
945 try:
946 loc,tokens = self.parseImpl( instring, preloc, doActions )
947 except IndexError:
948 raise ParseException( instring, len(instring), self.errmsg, self )
949 else:
950 loc,tokens = self.parseImpl( instring, preloc, doActions )
951
952 tokens = self.postParse( instring, loc, tokens )
953
954 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
955 if self.parseAction and (doActions or self.callDuringTry):
956 if debugging:
957 try:
958 for fn in self.parseAction:
959 tokens = fn( instring, tokensStart, retTokens )
960 if tokens is not None:
961 retTokens = ParseResults( tokens,
962 self.resultsName,
963 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
964 modal=self.modalResults )
965 except ParseBaseException, err:
966 #~ print "Exception raised in user parse action:", err
967 if (self.debugActions[2] ):
968 self.debugActions[2]( instring, tokensStart, self, err )
969 raise
970 else:
971 for fn in self.parseAction:
972 tokens = fn( instring, tokensStart, retTokens )
973 if tokens is not None:
974 retTokens = ParseResults( tokens,
975 self.resultsName,
976 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
977 modal=self.modalResults )
978
979 if debugging:
980 #~ print ("Matched",self,"->",retTokens.asList())
981 if (self.debugActions[1] ):
982 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
983
984 return loc, retTokens
985
986 def tryParse( self, instring, loc ):
987 try:
988 return self._parse( instring, loc, doActions=False )[0]
989 except ParseFatalException:
990 raise ParseException( instring, loc, self.errmsg, self)
991
992 # this method gets repeatedly called during backtracking with the same arguments -
993 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
994 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
995 lookup = (self,instring,loc,callPreParse,doActions)
996 if lookup in ParserElement._exprArgCache:
997 value = ParserElement._exprArgCache[ lookup ]
998 if isinstance(value,Exception):
999 raise value
1000 return value
1001 else:
1002 try:
1003 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1004 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1005 return value
1006 except ParseBaseException, pe:
1007 ParserElement._exprArgCache[ lookup ] = pe
1008 raise
1009
1010 _parse = _parseNoCache
1011
1012 # argument cache for optimizing repeated calls when backtracking through recursive expressions
1013 _exprArgCache = {}
1014 def resetCache():
1015 ParserElement._exprArgCache.clear()
1016 resetCache = staticmethod(resetCache)
1017
1018 _packratEnabled = False
1019 def enablePackrat():
1020 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1021 Repeated parse attempts at the same string location (which happens
1022 often in many complex grammars) can immediately return a cached value,
1023 instead of re-executing parsing/validating code. Memoizing is done of
1024 both valid results and parsing exceptions.
1025
1026 This speedup may break existing programs that use parse actions that
1027 have side-effects. For this reason, packrat parsing is disabled when
1028 you first import pyparsing. To activate the packrat feature, your
1029 program must call the class method ParserElement.enablePackrat(). If
1030 your program uses psyco to "compile as you go", you must call
1031 enablePackrat before calling psyco.full(). If you do not do this,
1032 Python will crash. For best results, call enablePackrat() immediately
1033 after importing pyparsing.
1034 """
1035 if not ParserElement._packratEnabled:
1036 ParserElement._packratEnabled = True
1037 ParserElement._parse = ParserElement._parseCache
1038 enablePackrat = staticmethod(enablePackrat)
1039
1040 def parseString( self, instring, parseAll=False ):
1041 """Execute the parse expression with the given string.
1042 This is the main interface to the client code, once the complete
1043 expression has been built.
1044
1045 If you want the grammar to require that the entire input string be
1046 successfully parsed, then set parseAll to True (equivalent to ending
1047 the grammar with StringEnd()).
1048
1049 Note: parseString implicitly calls expandtabs() on the input string,
1050 in order to report proper column numbers in parse actions.
1051 If the input string contains tabs and
1052 the grammar uses parse actions that use the loc argument to index into the
1053 string being parsed, you can ensure you have a consistent view of the input
1054 string by:
1055 - calling parseWithTabs on your grammar before calling parseString
1056 (see L{I{parseWithTabs}<parseWithTabs>})
1057 - define your parse action using the full (s,loc,toks) signature, and
1058 reference the input string using the parse action's s argument
1059 - explictly expand the tabs in your input string before calling
1060 parseString
1061 """
1062 ParserElement.resetCache()
1063 if not self.streamlined:
1064 self.streamline()
1065 #~ self.saveAsList = True
1066 for e in self.ignoreExprs:
1067 e.streamline()
1068 if not self.keepTabs:
1069 instring = instring.expandtabs()
1070 try:
1071 loc, tokens = self._parse( instring, 0 )
1072 if parseAll:
1073 loc = self.preParse( instring, loc )
1074 StringEnd()._parse( instring, loc )
1075 except ParseBaseException, exc:
1076 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1077 raise exc
1078 else:
1079 return tokens
1080
1081 def scanString( self, instring, maxMatches=_MAX_INT ):
1082 """Scan the input string for expression matches. Each match will return the
1083 matching tokens, start location, and end location. May be called with optional
1084 maxMatches argument, to clip scanning after 'n' matches are found.
1085
1086 Note that the start and end locations are reported relative to the string
1087 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1088 strings with embedded tabs."""
1089 if not self.streamlined:
1090 self.streamline()
1091 for e in self.ignoreExprs:
1092 e.streamline()
1093
1094 if not self.keepTabs:
1095 instring = _ustr(instring).expandtabs()
1096 instrlen = len(instring)
1097 loc = 0
1098 preparseFn = self.preParse
1099 parseFn = self._parse
1100 ParserElement.resetCache()
1101 matches = 0
1102 try:
1103 while loc <= instrlen and matches < maxMatches:
1104 try:
1105 preloc = preparseFn( instring, loc )
1106 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1107 except ParseException:
1108 loc = preloc+1
1109 else:
1110 matches += 1
1111 yield tokens, preloc, nextLoc
1112 loc = nextLoc
1113 except ParseBaseException, pe:
1114 raise pe
1115
1116 def transformString( self, instring ):
1117 """Extension to scanString, to modify matching text with modified tokens that may
1118 be returned from a parse action. To use transformString, define a grammar and
1119 attach a parse action to it that modifies the returned token list.
1120 Invoking transformString() on a target string will then scan for matches,
1121 and replace the matched text patterns according to the logic in the parse
1122 action. transformString() returns the resulting transformed string."""
1123 out = []
1124 lastE = 0
1125 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1126 # keep string locs straight between transformString and scanString
1127 self.keepTabs = True
1128 try:
1129 for t,s,e in self.scanString( instring ):
1130 out.append( instring[lastE:s] )
1131 if t:
1132 if isinstance(t,ParseResults):
1133 out += t.asList()
1134 elif isinstance(t,list):
1135 out += t
1136 else:
1137 out.append(t)
1138 lastE = e
1139 out.append(instring[lastE:])
1140 return "".join(map(_ustr,out))
1141 except ParseBaseException, pe:
1142 raise pe
1143
1144 def searchString( self, instring, maxMatches=_MAX_INT ):
1145 """Another extension to scanString, simplifying the access to the tokens found
1146 to match the given parse expression. May be called with optional
1147 maxMatches argument, to clip searching after 'n' matches are found.
1148 """
1149 try:
1150 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1151 except ParseBaseException, pe:
1152 raise pe
1153
1154 def __add__(self, other ):
1155 """Implementation of + operator - returns And"""
1156 if isinstance( other, basestring ):
1157 other = Literal( other )
1158 if not isinstance( other, ParserElement ):
1159 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1160 SyntaxWarning, stacklevel=2)
1161 return None
1162 return And( [ self, other ] )
1163
1164 def __radd__(self, other ):
1165 """Implementation of + operator when left operand is not a ParserElement"""
1166 if isinstance( other, basestring ):
1167 other = Literal( other )
1168 if not isinstance( other, ParserElement ):
1169 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1170 SyntaxWarning, stacklevel=2)
1171 return None
1172 return other + self
1173
1174 def __sub__(self, other):
1175 """Implementation of - operator, returns And with error stop"""
1176 if isinstance( other, basestring ):
1177 other = Literal( other )
1178 if not isinstance( other, ParserElement ):
1179 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1180 SyntaxWarning, stacklevel=2)
1181 return None
1182 return And( [ self, And._ErrorStop(), other ] )
1183
1184 def __rsub__(self, other ):
1185 """Implementation of - operator when left operand is not a ParserElement"""
1186 if isinstance( other, basestring ):
1187 other = Literal( other )
1188 if not isinstance( other, ParserElement ):
1189 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1190 SyntaxWarning, stacklevel=2)
1191 return None
1192 return other - self
1193
1194 def __mul__(self,other):
1195 if isinstance(other,int):
1196 minElements, optElements = other,0
1197 elif isinstance(other,tuple):
1198 other = (other + (None, None))[:2]
1199 if other[0] is None:
1200 other = (0, other[1])
1201 if isinstance(other[0],int) and other[1] is None:
1202 if other[0] == 0:
1203 return ZeroOrMore(self)
1204 if other[0] == 1:
1205 return OneOrMore(self)
1206 else:
1207 return self*other[0] + ZeroOrMore(self)
1208 elif isinstance(other[0],int) and isinstance(other[1],int):
1209 minElements, optElements = other
1210 optElements -= minElements
1211 else:
1212 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1213 else:
1214 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1215
1216 if minElements < 0:
1217 raise ValueError("cannot multiply ParserElement by negative value")
1218 if optElements < 0:
1219 raise ValueError("second tuple value must be greater or equal to first tuple value")
1220 if minElements == optElements == 0:
1221 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1222
1223 if (optElements):
1224 def makeOptionalList(n):
1225 if n>1:
1226 return Optional(self + makeOptionalList(n-1))
1227 else:
1228 return Optional(self)
1229 if minElements:
1230 if minElements == 1:
1231 ret = self + makeOptionalList(optElements)
1232 else:
1233 ret = And([self]*minElements) + makeOptionalList(optElements)
1234 else:
1235 ret = makeOptionalList(optElements)
1236 else:
1237 if minElements == 1:
1238 ret = self
1239 else:
1240 ret = And([self]*minElements)
1241 return ret
1242
1243 def __rmul__(self, other):
1244 return self.__mul__(other)
1245
1246 def __or__(self, other ):
1247 """Implementation of | operator - returns MatchFirst"""
1248 if isinstance( other, basestring ):
1249 other = Literal( other )
1250 if not isinstance( other, ParserElement ):
1251 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1252 SyntaxWarning, stacklevel=2)
1253 return None
1254 return MatchFirst( [ self, other ] )
1255
1256 def __ror__(self, other ):
1257 """Implementation of | operator when left operand is not a ParserElement"""
1258 if isinstance( other, basestring ):
1259 other = Literal( other )
1260 if not isinstance( other, ParserElement ):
1261 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1262 SyntaxWarning, stacklevel=2)
1263 return None
1264 return other | self
1265
1266 def __xor__(self, other ):
1267 """Implementation of ^ operator - returns Or"""
1268 if isinstance( other, basestring ):
1269 other = Literal( other )
1270 if not isinstance( other, ParserElement ):
1271 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1272 SyntaxWarning, stacklevel=2)
1273 return None
1274 return Or( [ self, other ] )
1275
1276 def __rxor__(self, other ):
1277 """Implementation of ^ operator when left operand is not a ParserElement"""
1278 if isinstance( other, basestring ):
1279 other = Literal( other )
1280 if not isinstance( other, ParserElement ):
1281 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1282 SyntaxWarning, stacklevel=2)
1283 return None
1284 return other ^ self
1285
1286 def __and__(self, other ):
1287 """Implementation of & operator - returns Each"""
1288 if isinstance( other, basestring ):
1289 other = Literal( other )
1290 if not isinstance( other, ParserElement ):
1291 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1292 SyntaxWarning, stacklevel=2)
1293 return None
1294 return Each( [ self, other ] )
1295
1296 def __rand__(self, other ):
1297 """Implementation of & operator when left operand is not a ParserElement"""
1298 if isinstance( other, basestring ):
1299 other = Literal( other )
1300 if not isinstance( other, ParserElement ):
1301 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1302 SyntaxWarning, stacklevel=2)
1303 return None
1304 return other & self
1305
1306 def __invert__( self ):
1307 """Implementation of ~ operator - returns NotAny"""
1308 return NotAny( self )
1309
1310 def __call__(self, name):
1311 """Shortcut for setResultsName, with listAllMatches=default::
1312 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1313 could be written as::
1314 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1315 """
1316 return self.setResultsName(name)
1317
1318 def suppress( self ):
1319 """Suppresses the output of this ParserElement; useful to keep punctuation from
1320 cluttering up returned output.
1321 """
1322 return Suppress( self )
1323
1324 def leaveWhitespace( self ):
1325 """Disables the skipping of whitespace before matching the characters in the
1326 ParserElement's defined pattern. This is normally only used internally by
1327 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1328 """
1329 self.skipWhitespace = False
1330 return self
1331
1332 def setWhitespaceChars( self, chars ):
1333 """Overrides the default whitespace chars
1334 """
1335 self.skipWhitespace = True
1336 self.whiteChars = chars
1337 self.copyDefaultWhiteChars = False
1338 return self
1339
1340 def parseWithTabs( self ):
1341 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1342 Must be called before parseString when the input grammar contains elements that
1343 match <TAB> characters."""
1344 self.keepTabs = True
1345 return self
1346
1347 def ignore( self, other ):
1348 """Define expression to be ignored (e.g., comments) while doing pattern
1349 matching; may be called repeatedly, to define multiple comment or other
1350 ignorable patterns.
1351 """
1352 if isinstance( other, Suppress ):
1353 if other not in self.ignoreExprs:
1354 self.ignoreExprs.append( other )
1355 else:
1356 self.ignoreExprs.append( Suppress( other ) )
1357 return self
1358
1359 def setDebugActions( self, startAction, successAction, exceptionAction ):
1360 """Enable display of debugging messages while doing pattern matching."""
1361 self.debugActions = (startAction or _defaultStartDebugAction,
1362 successAction or _defaultSuccessDebugAction,
1363 exceptionAction or _defaultExceptionDebugAction)
1364 self.debug = True
1365 return self
1366
1367 def setDebug( self, flag=True ):
1368 """Enable display of debugging messages while doing pattern matching.
1369 Set flag to True to enable, False to disable."""
1370 if flag:
1371 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1372 else:
1373 self.debug = False
1374 return self
1375
1376 def __str__( self ):
1377 return self.name
1378
1379 def __repr__( self ):
1380 return _ustr(self)
1381
1382 def streamline( self ):
1383 self.streamlined = True
1384 self.strRepr = None
1385 return self
1386
1387 def checkRecursion( self, parseElementList ):
1388 pass
1389
1390 def validate( self, validateTrace=[] ):
1391 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1392 self.checkRecursion( [] )
1393
1394 def parseFile( self, file_or_filename, parseAll=False ):
1395 """Execute the parse expression on the given file or filename.
1396 If a filename is specified (instead of a file object),
1397 the entire file is opened, read, and closed before parsing.
1398 """
1399 try:
1400 file_contents = file_or_filename.read()
1401 except AttributeError:
1402 f = open(file_or_filename, "rb")
1403 file_contents = f.read()
1404 f.close()
1405 try:
1406 return self.parseString(file_contents, parseAll)
1407 except ParseBaseException, exc:
1408 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1409 raise exc
1410
1411 def getException(self):
1412 return ParseException("",0,self.errmsg,self)
1413
1414 def __getattr__(self,aname):
1415 if aname == "myException":
1416 self.myException = ret = self.getException();
1417 return ret;
1418 else:
1419 raise AttributeError("no such attribute " + aname)
1420
1421 def __eq__(self,other):
1422 if isinstance(other, ParserElement):
1423 return self is other or self.__dict__ == other.__dict__
1424 elif isinstance(other, basestring):
1425 try:
1426 self.parseString(_ustr(other), parseAll=True)
1427 return True
1428 except ParseBaseException:
1429 return False
1430 else:
1431 return super(ParserElement,self)==other
1432
1433 def __ne__(self,other):
1434 return not (self == other)
1435
1436 def __hash__(self):
1437 return hash(id(self))
1438
1439 def __req__(self,other):
1440 return self == other
1441
1442 def __rne__(self,other):
1443 return not (self == other)
1444
1445
1446 class Token(ParserElement):
1447 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1448 def __init__( self ):
1449 super(Token,self).__init__( savelist=False )
1450 #self.myException = ParseException("",0,"",self)
1451
1452 def setName(self, name):
1453 s = super(Token,self).setName(name)
1454 self.errmsg = "Expected " + self.name
1455 #s.myException.msg = self.errmsg
1456 return s
1457
1458
1459 class Empty(Token):
1460 """An empty token, will always match."""
1461 def __init__( self ):
1462 super(Empty,self).__init__()
1463 self.name = "Empty"
1464 self.mayReturnEmpty = True
1465 self.mayIndexError = False
1466
1467
1468 class NoMatch(Token):
1469 """A token that will never match."""
1470 def __init__( self ):
1471 super(NoMatch,self).__init__()
1472 self.name = "NoMatch"
1473 self.mayReturnEmpty = True
1474 self.mayIndexError = False
1475 self.errmsg = "Unmatchable token"
1476 #self.myException.msg = self.errmsg
1477
1478 def parseImpl( self, instring, loc, doActions=True ):
1479 exc = self.myException
1480 exc.loc = loc
1481 exc.pstr = instring
1482 raise exc
1483
1484
1485 class Literal(Token):
1486 """Token to exactly match a specified string."""
1487 def __init__( self, matchString ):
1488 super(Literal,self).__init__()
1489 self.match = matchString
1490 self.matchLen = len(matchString)
1491 try:
1492 self.firstMatchChar = matchString[0]
1493 except IndexError:
1494 warnings.warn("null string passed to Literal; use Empty() instead",
1495 SyntaxWarning, stacklevel=2)
1496 self.__class__ = Empty
1497 self.name = '"%s"' % _ustr(self.match)
1498 self.errmsg = "Expected " + self.name
1499 self.mayReturnEmpty = False
1500 #self.myException.msg = self.errmsg
1501 self.mayIndexError = False
1502
1503 # Performance tuning: this routine gets called a *lot*
1504 # if this is a single character match string and the first character matches,
1505 # short-circuit as quickly as possible, and avoid calling startswith
1506 #~ @profile
1507 def parseImpl( self, instring, loc, doActions=True ):
1508 if (instring[loc] == self.firstMatchChar and
1509 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1510 return loc+self.matchLen, self.match
1511 #~ raise ParseException( instring, loc, self.errmsg )
1512 exc = self.myException
1513 exc.loc = loc
1514 exc.pstr = instring
1515 raise exc
1516 _L = Literal
1517
1518 class Keyword(Token):
1519 """Token to exactly match a specified string as a keyword, that is, it must be
1520 immediately followed by a non-keyword character. Compare with Literal::
1521 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1522 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1523 Accepts two optional constructor arguments in addition to the keyword string:
1524 identChars is a string of characters that would be valid identifier characters,
1525 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1526 matching, default is False.
1527 """
1528 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1529
1530 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1531 super(Keyword,self).__init__()
1532 self.match = matchString
1533 self.matchLen = len(matchString)
1534 try:
1535 self.firstMatchChar = matchString[0]
1536 except IndexError:
1537 warnings.warn("null string passed to Keyword; use Empty() instead",
1538 SyntaxWarning, stacklevel=2)
1539 self.name = '"%s"' % self.match
1540 self.errmsg = "Expected " + self.name
1541 self.mayReturnEmpty = False
1542 #self.myException.msg = self.errmsg
1543 self.mayIndexError = False
1544 self.caseless = caseless
1545 if caseless:
1546 self.caselessmatch = matchString.upper()
1547 identChars = identChars.upper()
1548 self.identChars = _str2dict(identChars)
1549
1550 def parseImpl( self, instring, loc, doActions=True ):
1551 if self.caseless:
1552 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1553 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1554 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1555 return loc+self.matchLen, self.match
1556 else:
1557 if (instring[loc] == self.firstMatchChar and
1558 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1559 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1560 (loc == 0 or instring[loc-1] not in self.identChars) ):
1561 return loc+self.matchLen, self.match
1562 #~ raise ParseException( instring, loc, self.errmsg )
1563 exc = self.myException
1564 exc.loc = loc
1565 exc.pstr = instring
1566 raise exc
1567
1568 def copy(self):
1569 c = super(Keyword,self).copy()
1570 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1571 return c
1572
1573 def setDefaultKeywordChars( chars ):
1574 """Overrides the default Keyword chars
1575 """
1576 Keyword.DEFAULT_KEYWORD_CHARS = chars
1577 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1578
1579 class CaselessLiteral(Literal):
1580 """Token to match a specified string, ignoring case of letters.
1581 Note: the matched results will always be in the case of the given
1582 match string, NOT the case of the input text.
1583 """
1584 def __init__( self, matchString ):
1585 super(CaselessLiteral,self).__init__( matchString.upper() )
1586 # Preserve the defining literal.
1587 self.returnString = matchString
1588 self.name = "'%s'" % self.returnString
1589 self.errmsg = "Expected " + self.name
1590 #self.myException.msg = self.errmsg
1591
1592 def parseImpl( self, instring, loc, doActions=True ):
1593 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1594 return loc+self.matchLen, self.returnString
1595 #~ raise ParseException( instring, loc, self.errmsg )
1596 exc = self.myException
1597 exc.loc = loc
1598 exc.pstr = instring
1599 raise exc
1600
1601 class CaselessKeyword(Keyword):
1602 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1603 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1604
1605 def parseImpl( self, instring, loc, doActions=True ):
1606 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1607 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1608 return loc+self.matchLen, self.match
1609 #~ raise ParseException( instring, loc, self.errmsg )
1610 exc = self.myException
1611 exc.loc = loc
1612 exc.pstr = instring
1613 raise exc
1614
1615 class Word(Token):
1616 """Token for matching words composed of allowed character sets.
1617 Defined with string containing all allowed initial characters,
1618 an optional string containing allowed body characters (if omitted,
1619 defaults to the initial character set), and an optional minimum,
1620 maximum, and/or exact length. The default value for min is 1 (a
1621 minimum value < 1 is not valid); the default values for max and exact
1622 are 0, meaning no maximum or exact length restriction.
1623 """
1624 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1625 super(Word,self).__init__()
1626 self.initCharsOrig = initChars
1627 self.initChars = _str2dict(initChars)
1628 if bodyChars :
1629 self.bodyCharsOrig = bodyChars
1630 self.bodyChars = _str2dict(bodyChars)
1631 else:
1632 self.bodyCharsOrig = initChars
1633 self.bodyChars = _str2dict(initChars)
1634
1635 self.maxSpecified = max > 0
1636
1637 if min < 1:
1638 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1639
1640 self.minLen = min
1641
1642 if max > 0:
1643 self.maxLen = max
1644 else:
1645 self.maxLen = _MAX_INT
1646
1647 if exact > 0:
1648 self.maxLen = exact
1649 self.minLen = exact
1650
1651 self.name = _ustr(self)
1652 self.errmsg = "Expected " + self.name
1653 #self.myException.msg = self.errmsg
1654 self.mayIndexError = False
1655 self.asKeyword = asKeyword
1656
1657 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1658 if self.bodyCharsOrig == self.initCharsOrig:
1659 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1660 elif len(self.bodyCharsOrig) == 1:
1661 self.reString = "%s[%s]*" % \
1662 (re.escape(self.initCharsOrig),
1663 _escapeRegexRangeChars(self.bodyCharsOrig),)
1664 else:
1665 self.reString = "[%s][%s]*" % \
1666 (_escapeRegexRangeChars(self.initCharsOrig),
1667 _escapeRegexRangeChars(self.bodyCharsOrig),)
1668 if self.asKeyword:
1669 self.reString = r"\b"+self.reString+r"\b"
1670 try:
1671 self.re = re.compile( self.reString )
1672 except:
1673 self.re = None
1674
1675 def parseImpl( self, instring, loc, doActions=True ):
1676 if self.re:
1677 result = self.re.match(instring,loc)
1678 if not result:
1679 exc = self.myException
1680 exc.loc = loc
1681 exc.pstr = instring
1682 raise exc
1683
1684 loc = result.end()
1685 return loc,result.group()
1686
1687 if not(instring[ loc ] in self.initChars):
1688 #~ raise ParseException( instring, loc, self.errmsg )
1689 exc = self.myException
1690 exc.loc = loc
1691 exc.pstr = instring
1692 raise exc
1693 start = loc
1694 loc += 1
1695 instrlen = len(instring)
1696 bodychars = self.bodyChars
1697 maxloc = start + self.maxLen
1698 maxloc = min( maxloc, instrlen )
1699 while loc < maxloc and instring[loc] in bodychars:
1700 loc += 1
1701
1702 throwException = False
1703 if loc - start < self.minLen:
1704 throwException = True
1705 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1706 throwException = True
1707 if self.asKeyword:
1708 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1709 throwException = True
1710
1711 if throwException:
1712 #~ raise ParseException( instring, loc, self.errmsg )
1713 exc = self.myException
1714 exc.loc = loc
1715 exc.pstr = instring
1716 raise exc
1717
1718 return loc, instring[start:loc]
1719
1720 def __str__( self ):
1721 try:
1722 return super(Word,self).__str__()
1723 except:
1724 pass
1725
1726
1727 if self.strRepr is None:
1728
1729 def charsAsStr(s):
1730 if len(s)>4:
1731 return s[:4]+"..."
1732 else:
1733 return s
1734
1735 if ( self.initCharsOrig != self.bodyCharsOrig ):
1736 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1737 else:
1738 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1739
1740 return self.strRepr
1741
1742
1743 class Regex(Token):
1744 """Token for matching strings that match a given regular expression.
1745 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1746 """
1747 def __init__( self, pattern, flags=0):
1748 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1749 super(Regex,self).__init__()
1750
1751 if len(pattern) == 0:
1752 warnings.warn("null string passed to Regex; use Empty() instead",
1753 SyntaxWarning, stacklevel=2)
1754
1755 self.pattern = pattern
1756 self.flags = flags
1757
1758 try:
1759 self.re = re.compile(self.pattern, self.flags)
1760 self.reString = self.pattern
1761 except sre_constants.error:
1762 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1763 SyntaxWarning, stacklevel=2)
1764 raise
1765
1766 self.name = _ustr(self)
1767 self.errmsg = "Expected " + self.name
1768 #self.myException.msg = self.errmsg
1769 self.mayIndexError = False
1770 self.mayReturnEmpty = True
1771
1772 def parseImpl( self, instring, loc, doActions=True ):
1773 result = self.re.match(instring,loc)
1774 if not result:
1775 exc = self.myException
1776 exc.loc = loc
1777 exc.pstr = instring
1778 raise exc
1779
1780 loc = result.end()
1781 d = result.groupdict()
1782 ret = ParseResults(result.group())
1783 if d:
1784 for k in d:
1785 ret[k] = d[k]
1786 return loc,ret
1787
1788 def __str__( self ):
1789 try:
1790 return super(Regex,self).__str__()
1791 except:
1792 pass
1793
1794 if self.strRepr is None:
1795 self.strRepr = "Re:(%s)" % repr(self.pattern)
1796
1797 return self.strRepr
1798
1799
1800 class QuotedString(Token):
1801 """Token for matching strings that are delimited by quoting characters.
1802 """
1803 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1804 """
1805 Defined with the following parameters:
1806 - quoteChar - string of one or more characters defining the quote delimiting string
1807 - escChar - character to escape quotes, typically backslash (default=None)
1808 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1809 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1810 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1811 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1812 """
1813 super(QuotedString,self).__init__()
1814
1815 # remove white space from quote chars - wont work anyway
1816 quoteChar = quoteChar.strip()
1817 if len(quoteChar) == 0:
1818 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1819 raise SyntaxError()
1820
1821 if endQuoteChar is None:
1822 endQuoteChar = quoteChar
1823 else:
1824 endQuoteChar = endQuoteChar.strip()
1825 if len(endQuoteChar) == 0:
1826 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1827 raise SyntaxError()
1828
1829 self.quoteChar = quoteChar
1830 self.quoteCharLen = len(quoteChar)
1831 self.firstQuoteChar = quoteChar[0]
1832 self.endQuoteChar = endQuoteChar
1833 self.endQuoteCharLen = len(endQuoteChar)
1834 self.escChar = escChar
1835 self.escQuote = escQuote
1836 self.unquoteResults = unquoteResults
1837
1838 if multiline:
1839 self.flags = re.MULTILINE | re.DOTALL
1840 self.pattern = r'%s(?:[^%s%s]' % \
1841 ( re.escape(self.quoteChar),
1842 _escapeRegexRangeChars(self.endQuoteChar[0]),
1843 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1844 else:
1845 self.flags = 0
1846 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1847 ( re.escape(self.quoteChar),
1848 _escapeRegexRangeChars(self.endQuoteChar[0]),
1849 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1850 if len(self.endQuoteChar) > 1:
1851 self.pattern += (
1852 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1853 _escapeRegexRangeChars(self.endQuoteChar[i]))
1854 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1855 )
1856 if escQuote:
1857 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1858 if escChar:
1859 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1860 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1861 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1862
1863 try:
1864 self.re = re.compile(self.pattern, self.flags)
1865 self.reString = self.pattern
1866 except sre_constants.error:
1867 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1868 SyntaxWarning, stacklevel=2)
1869 raise
1870
1871 self.name = _ustr(self)
1872 self.errmsg = "Expected " + self.name
1873 #self.myException.msg = self.errmsg
1874 self.mayIndexError = False
1875 self.mayReturnEmpty = True
1876
1877 def parseImpl( self, instring, loc, doActions=True ):
1878 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1879 if not result:
1880 exc = self.myException
1881 exc.loc = loc
1882 exc.pstr = instring
1883 raise exc
1884
1885 loc = result.end()
1886 ret = result.group()
1887
1888 if self.unquoteResults:
1889
1890 # strip off quotes
1891 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1892
1893 if isinstance(ret,basestring):
1894 # replace escaped characters
1895 if self.escChar:
1896 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1897
1898 # replace escaped quotes
1899 if self.escQuote:
1900 ret = ret.replace(self.escQuote, self.endQuoteChar)
1901
1902 return loc, ret
1903
1904 def __str__( self ):
1905 try:
1906 return super(QuotedString,self).__str__()
1907 except:
1908 pass
1909
1910 if self.strRepr is None:
1911 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1912
1913 return self.strRepr
1914
1915
1916 class CharsNotIn(Token):
1917 """Token for matching words composed of characters *not* in a given set.
1918 Defined with string containing all disallowed characters, and an optional
1919 minimum, maximum, and/or exact length. The default value for min is 1 (a
1920 minimum value < 1 is not valid); the default values for max and exact
1921 are 0, meaning no maximum or exact length restriction.
1922 """
1923 def __init__( self, notChars, min=1, max=0, exact=0 ):
1924 super(CharsNotIn,self).__init__()
1925 self.skipWhitespace = False
1926 self.notChars = notChars
1927
1928 if min < 1:
1929 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1930
1931 self.minLen = min
1932
1933 if max > 0:
1934 self.maxLen = max
1935 else:
1936 self.maxLen = _MAX_INT
1937
1938 if exact > 0:
1939 self.maxLen = exact
1940 self.minLen = exact
1941
1942 self.name = _ustr(self)
1943 self.errmsg = "Expected " + self.name
1944 self.mayReturnEmpty = ( self.minLen == 0 )
1945 #self.myException.msg = self.errmsg
1946 self.mayIndexError = False
1947
1948 def parseImpl( self, instring, loc, doActions=True ):
1949 if instring[loc] in self.notChars:
1950 #~ raise ParseException( instring, loc, self.errmsg )
1951 exc = self.myException
1952 exc.loc = loc
1953 exc.pstr = instring
1954 raise exc
1955
1956 start = loc
1957 loc += 1
1958 notchars = self.notChars
1959 maxlen = min( start+self.maxLen, len(instring) )
1960 while loc < maxlen and \
1961 (instring[loc] not in notchars):
1962 loc += 1
1963
1964 if loc - start < self.minLen:
1965 #~ raise ParseException( instring, loc, self.errmsg )
1966 exc = self.myException
1967 exc.loc = loc
1968 exc.pstr = instring
1969 raise exc
1970
1971 return loc, instring[start:loc]
1972
1973 def __str__( self ):
1974 try:
1975 return super(CharsNotIn, self).__str__()
1976 except:
1977 pass
1978
1979 if self.strRepr is None:
1980 if len(self.notChars) > 4:
1981 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1982 else:
1983 self.strRepr = "!W:(%s)" % self.notChars
1984
1985 return self.strRepr
1986
1987 class White(Token):
1988 """Special matching class for matching whitespace. Normally, whitespace is ignored
1989 by pyparsing grammars. This class is included when some whitespace structures
1990 are significant. Define with a string containing the whitespace characters to be
1991 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
1992 as defined for the Word class."""
1993 whiteStrs = {
1994 " " : "<SPC>",
1995 "\t": "<TAB>",
1996 "\n": "<LF>",
1997 "\r": "<CR>",
1998 "\f": "<FF>",
1999 }
2000 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2001 super(White,self).__init__()
2002 self.matchWhite = ws
2003 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2004 #~ self.leaveWhitespace()
2005 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2006 self.mayReturnEmpty = True
2007 self.errmsg = "Expected " + self.name
2008 #self.myException.msg = self.errmsg
2009
2010 self.minLen = min
2011
2012 if max > 0:
2013 self.maxLen = max
2014 else:
2015 self.maxLen = _MAX_INT
2016
2017 if exact > 0:
2018 self.maxLen = exact
2019 self.minLen = exact
2020
2021 def parseImpl( self, instring, loc, doActions=True ):
2022 if not(instring[ loc ] in self.matchWhite):
2023 #~ raise ParseException( instring, loc, self.errmsg )
2024 exc = self.myException
2025 exc.loc = loc
2026 exc.pstr = instring
2027 raise exc
2028 start = loc
2029 loc += 1
2030 maxloc = start + self.maxLen
2031 maxloc = min( maxloc, len(instring) )
2032 while loc < maxloc and instring[loc] in self.matchWhite:
2033 loc += 1
2034
2035 if loc - start < self.minLen:
2036 #~ raise ParseException( instring, loc, self.errmsg )
2037 exc = self.myException
2038 exc.loc = loc
2039 exc.pstr = instring
2040 raise exc
2041
2042 return loc, instring[start:loc]
2043
2044
2045 class _PositionToken(Token):
2046 def __init__( self ):
2047 super(_PositionToken,self).__init__()
2048 self.name=self.__class__.__name__
2049 self.mayReturnEmpty = True
2050 self.mayIndexError = False
2051
2052 class GoToColumn(_PositionToken):
2053 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2054 def __init__( self, colno ):
2055 super(GoToColumn,self).__init__()
2056 self.col = colno
2057
2058 def preParse( self, instring, loc ):
2059 if col(loc,instring) != self.col:
2060 instrlen = len(instring)
2061 if self.ignoreExprs:
2062 loc = self._skipIgnorables( instring, loc )
2063 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2064 loc += 1
2065 return loc
2066
2067 def parseImpl( self, instring, loc, doActions=True ):
2068 thiscol = col( loc, instring )
2069 if thiscol > self.col:
2070 raise ParseException( instring, loc, "Text not in expected column", self )
2071 newloc = loc + self.col - thiscol
2072 ret = instring[ loc: newloc ]
2073 return newloc, ret
2074
2075 class LineStart(_PositionToken):
2076 """Matches if current position is at the beginning of a line within the parse string"""
2077 def __init__( self ):
2078 super(LineStart,self).__init__()
2079 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2080 self.errmsg = "Expected start of line"
2081 #self.myException.msg = self.errmsg
2082
2083 def preParse( self, instring, loc ):
2084 preloc = super(LineStart,self).preParse(instring,loc)
2085 if instring[preloc] == "\n":
2086 loc += 1
2087 return loc
2088
2089 def parseImpl( self, instring, loc, doActions=True ):
2090 if not( loc==0 or
2091 (loc == self.preParse( instring, 0 )) or
2092 (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2093 #~ raise ParseException( instring, loc, "Expected start of line" )
2094 exc = self.myException
2095 exc.loc = loc
2096 exc.pstr = instring
2097 raise exc
2098 return loc, []
2099
2100 class LineEnd(_PositionToken):
2101 """Matches if current position is at the end of a line within the parse string"""
2102 def __init__( self ):
2103 super(LineEnd,self).__init__()
2104 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2105 self.errmsg = "Expected end of line"
2106 #self.myException.msg = self.errmsg
2107
2108 def parseImpl( self, instring, loc, doActions=True ):
2109 if loc<len(instring):
2110 if instring[loc] == "\n":
2111 return loc+1, "\n"
2112 else:
2113 #~ raise ParseException( instring, loc, "Expected end of line" )
2114 exc = self.myException
2115 exc.loc = loc
2116 exc.pstr = instring
2117 raise exc
2118 elif loc == len(instring):
2119 return loc+1, []
2120 else:
2121 exc = self.myException
2122 exc.loc = loc
2123 exc.pstr = instring
2124 raise exc
2125
2126 class StringStart(_PositionToken):
2127 """Matches if current position is at the beginning of the parse string"""
2128 def __init__( self ):
2129 super(StringStart,self).__init__()
2130 self.errmsg = "Expected start of text"
2131 #self.myException.msg = self.errmsg
2132
2133 def parseImpl( self, instring, loc, doActions=True ):
2134 if loc != 0:
2135 # see if entire string up to here is just whitespace and ignoreables
2136 if loc != self.preParse( instring, 0 ):
2137 #~ raise ParseException( instring, loc, "Expected start of text" )
2138 exc = self.myException
2139 exc.loc = loc
2140 exc.pstr = instring
2141 raise exc
2142 return loc, []
2143
2144 class StringEnd(_PositionToken):
2145 """Matches if current position is at the end of the parse string"""
2146 def __init__( self ):
2147 super(StringEnd,self).__init__()
2148 self.errmsg = "Expected end of text"
2149 #self.myException.msg = self.errmsg
2150
2151 def parseImpl( self, instring, loc, doActions=True ):
2152 if loc < len(instring):
2153 #~ raise ParseException( instring, loc, "Expected end of text" )
2154 exc = self.myException
2155 exc.loc = loc
2156 exc.pstr = instring
2157 raise exc
2158 elif loc == len(instring):
2159 return loc+1, []
2160 elif loc > len(instring):
2161 return loc, []
2162 else:
2163 exc = self.myException
2164 exc.loc = loc
2165 exc.pstr = instring
2166 raise exc
2167
2168 class WordStart(_PositionToken):
2169 """Matches if the current position is at the beginning of a Word, and
2170 is not preceded by any character in a given set of wordChars
2171 (default=printables). To emulate the \b behavior of regular expressions,
2172 use WordStart(alphanums). WordStart will also match at the beginning of
2173 the string being parsed, or at the beginning of a line.
2174 """
2175 def __init__(self, wordChars = printables):
2176 super(WordStart,self).__init__()
2177 self.wordChars = _str2dict(wordChars)
2178 self.errmsg = "Not at the start of a word"
2179
2180 def parseImpl(self, instring, loc, doActions=True ):
2181 if loc != 0:
2182 if (instring[loc-1] in self.wordChars or
2183 instring[loc] not in self.wordChars):
2184 exc = self.myException
2185 exc.loc = loc
2186 exc.pstr = instring
2187 raise exc
2188 return loc, []
2189
2190 class WordEnd(_PositionToken):
2191 """Matches if the current position is at the end of a Word, and
2192 is not followed by any character in a given set of wordChars
2193 (default=printables). To emulate the \b behavior of regular expressions,
2194 use WordEnd(alphanums). WordEnd will also match at the end of
2195 the string being parsed, or at the end of a line.
2196 """
2197 def __init__(self, wordChars = printables):
2198 super(WordEnd,self).__init__()
2199 self.wordChars = _str2dict(wordChars)
2200 self.skipWhitespace = False
2201 self.errmsg = "Not at the end of a word"
2202
2203 def parseImpl(self, instring, loc, doActions=True ):
2204 instrlen = len(instring)
2205 if instrlen>0 and loc<instrlen:
2206 if (instring[loc] in self.wordChars or
2207 instring[loc-1] not in self.wordChars):
2208 #~ raise ParseException( instring, loc, "Expected end of word" )
2209 exc = self.myException
2210 exc.loc = loc
2211 exc.pstr = instring
2212 raise exc
2213 return loc, []
2214
2215
2216 class ParseExpression(ParserElement):
2217 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2218 def __init__( self, exprs, savelist = False ):
2219 super(ParseExpression,self).__init__(savelist)
2220 if isinstance( exprs, list ):
2221 self.exprs = exprs
2222 elif isinstance( exprs, basestring ):
2223 self.exprs = [ Literal( exprs ) ]
2224 else:
2225 try:
2226 self.exprs = list( exprs )
2227 except TypeError:
2228 self.exprs = [ exprs ]
2229 self.callPreparse = False
2230
2231 def __getitem__( self, i ):
2232 return self.exprs[i]
2233
2234 def append( self, other ):
2235 self.exprs.append( other )
2236 self.strRepr = None
2237 return self
2238
2239 def leaveWhitespace( self ):
2240 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2241 all contained expressions."""
2242 self.skipWhitespace = False
2243 self.exprs = [ e.copy() for e in self.exprs ]
2244 for e in self.exprs:
2245 e.leaveWhitespace()
2246 return self
2247
2248 def ignore( self, other ):
2249 if isinstance( other, Suppress ):
2250 if other not in self.ignoreExprs:
2251 super( ParseExpression, self).ignore( other )
2252 for e in self.exprs:
2253 e.ignore( self.ignoreExprs[-1] )
2254 else:
2255 super( ParseExpression, self).ignore( other )
2256 for e in self.exprs:
2257 e.ignore( self.ignoreExprs[-1] )
2258 return self
2259
2260 def __str__( self ):
2261 try:
2262 return super(ParseExpression,self).__str__()
2263 except:
2264 pass
2265
2266 if self.strRepr is None:
2267 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2268 return self.strRepr
2269
2270 def streamline( self ):
2271 super(ParseExpression,self).streamline()
2272
2273 for e in self.exprs:
2274 e.streamline()
2275
2276 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2277 # but only if there are no parse actions or resultsNames on the nested And's
2278 # (likewise for Or's and MatchFirst's)
2279 if ( len(self.exprs) == 2 ):
2280 other = self.exprs[0]
2281 if ( isinstance( other, self.__class__ ) and
2282 not(other.parseAction) and
2283 other.resultsName is None and
2284 not other.debug ):
2285 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2286 self.strRepr = None
2287 self.mayReturnEmpty |= other.mayReturnEmpty
2288 self.mayIndexError |= other.mayIndexError
2289
2290 other = self.exprs[-1]
2291 if ( isinstance( other, self.__class__ ) and
2292 not(other.parseAction) and
2293 other.resultsName is None and
2294 not other.debug ):
2295 self.exprs = self.exprs[:-1] + other.exprs[:]
2296 self.strRepr = None
2297 self.mayReturnEmpty |= other.mayReturnEmpty
2298 self.mayIndexError |= other.mayIndexError
2299
2300 return self
2301
2302 def setResultsName( self, name, listAllMatches=False ):
2303 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
2304 return ret
2305
2306 def validate( self, validateTrace=[] ):
2307 tmp = validateTrace[:]+[self]
2308 for e in self.exprs:
2309 e.validate(tmp)
2310 self.checkRecursion( [] )
2311
2312 class And(ParseExpression):
2313 """Requires all given ParseExpressions to be found in the given order.
2314 Expressions may be separated by whitespace.
2315 May be constructed using the '+' operator.
2316 """
2317
2318 class _ErrorStop(Empty):
2319 def __init__(self, *args, **kwargs):
2320 super(Empty,self).__init__(*args, **kwargs)
2321 self.leaveWhitespace()
2322
2323 def __init__( self, exprs, savelist = True ):
2324 super(And,self).__init__(exprs, savelist)
2325 self.mayReturnEmpty = True
2326 for e in self.exprs:
2327 if not e.mayReturnEmpty:
2328 self.mayReturnEmpty = False
2329 break
2330 self.setWhitespaceChars( exprs[0].whiteChars )
2331 self.skipWhitespace = exprs[0].skipWhitespace
2332 self.callPreparse = True
2333
2334 def parseImpl( self, instring, loc, doActions=True ):
2335 # pass False as last arg to _parse for first element, since we already
2336 # pre-parsed the string as part of our And pre-parsing
2337 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2338 errorStop = False
2339 for e in self.exprs[1:]:
2340 if isinstance(e, And._ErrorStop):
2341 errorStop = True
2342 continue
2343 if errorStop:
2344 try:
2345 loc, exprtokens = e._parse( instring, loc, doActions )
2346 except ParseSyntaxException:
2347 raise
2348 except ParseBaseException, pe:
2349 raise ParseSyntaxException(pe)
2350 except IndexError, ie:
2351 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2352 else:
2353 loc, exprtokens = e._parse( instring, loc, doActions )
2354 if exprtokens or exprtokens.keys():
2355 resultlist += exprtokens
2356 return loc, resultlist
2357
2358 def __iadd__(self, other ):
2359 if isinstance( other, basestring ):
2360 other = Literal( other )
2361 return self.append( other ) #And( [ self, other ] )
2362
2363 def checkRecursion( self, parseElementList ):
2364 subRecCheckList = parseElementList[:] + [ self ]
2365 for e in self.exprs:
2366 e.checkRecursion( subRecCheckList )
2367 if not e.mayReturnEmpty:
2368 break
2369
2370 def __str__( self ):
2371 if hasattr(self,"name"):
2372 return self.name
2373
2374 if self.strRepr is None:
2375 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2376
2377 return self.strRepr
2378
2379
2380 class Or(ParseExpression):
2381 """Requires that at least one ParseExpression is found.
2382 If two expressions match, the expression that matches the longest string will be used.
2383 May be constructed using the '^' operator.
2384 """
2385 def __init__( self, exprs, savelist = False ):
2386 super(Or,self).__init__(exprs, savelist)
2387 self.mayReturnEmpty = False
2388 for e in self.exprs:
2389 if e.mayReturnEmpty:
2390 self.mayReturnEmpty = True
2391 break
2392
2393 def parseImpl( self, instring, loc, doActions=True ):
2394 maxExcLoc = -1
2395 maxMatchLoc = -1
2396 maxException = None
2397 for e in self.exprs:
2398 try:
2399 loc2 = e.tryParse( instring, loc )
2400 except ParseException, err:
2401 if err.loc > maxExcLoc:
2402 maxException = err
2403 maxExcLoc = err.loc
2404 except IndexError:
2405 if len(instring) > maxExcLoc:
2406 maxException = ParseException(instring,len(instring),e.errmsg,self)
2407 maxExcLoc = len(instring)
2408 else:
2409 if loc2 > maxMatchLoc:
2410 maxMatchLoc = loc2
2411 maxMatchExp = e
2412
2413 if maxMatchLoc < 0:
2414 if maxException is not None:
2415 raise maxException
2416 else:
2417 raise ParseException(instring, loc, "no defined alternatives to match", self)
2418
2419 return maxMatchExp._parse( instring, loc, doActions )
2420
2421 def __ixor__(self, other ):
2422 if isinstance( other, basestring ):
2423 other = Literal( other )
2424 return self.append( other ) #Or( [ self, other ] )
2425
2426 def __str__( self ):
2427 if hasattr(self,"name"):
2428 return self.name
2429
2430 if self.strRepr is None:
2431 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2432
2433 return self.strRepr
2434
2435 def checkRecursion( self, parseElementList ):
2436 subRecCheckList = parseElementList[:] + [ self ]
2437 for e in self.exprs:
2438 e.checkRecursion( subRecCheckList )
2439
2440
2441 class MatchFirst(ParseExpression):
2442 """Requires that at least one ParseExpression is found.
2443 If two expressions match, the first one listed is the one that will match.
2444 May be constructed using the '|' operator.
2445 """
2446 def __init__( self, exprs, savelist = False ):
2447 super(MatchFirst,self).__init__(exprs, savelist)
2448 if exprs:
2449 self.mayReturnEmpty = False
2450 for e in self.exprs:
2451 if e.mayReturnEmpty:
2452 self.mayReturnEmpty = True
2453 break
2454 else:
2455 self.mayReturnEmpty = True
2456
2457 def parseImpl( self, instring, loc, doActions=True ):
2458 maxExcLoc = -1
2459 maxException = None
2460 for e in self.exprs:
2461 try:
2462 ret = e._parse( instring, loc, doActions )
2463 return ret
2464 except ParseException, err:
2465 if err.loc > maxExcLoc:
2466 maxException = err
2467 maxExcLoc = err.loc
2468 except IndexError:
2469 if len(instring) > maxExcLoc:
2470 maxException = ParseException(instring,len(instring),e.errmsg,self)
2471 maxExcLoc = len(instring)
2472
2473 # only got here if no expression matched, raise exception for match that made it the furthest
2474 else:
2475 if maxException is not None:
2476 raise maxException
2477 else:
2478 raise ParseException(instring, loc, "no defined alternatives to match", self)
2479
2480 def __ior__(self, other ):
2481 if isinstance( other, basestring ):
2482 other = Literal( other )
2483 return self.append( other ) #MatchFirst( [ self, other ] )
2484
2485 def __str__( self ):
2486 if hasattr(self,"name"):
2487 return self.name
2488
2489 if self.strRepr is None:
2490 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2491
2492 return self.strRepr
2493
2494 def checkRecursion( self, parseElementList ):
2495 subRecCheckList = parseElementList[:] + [ self ]
2496 for e in self.exprs:
2497 e.checkRecursion( subRecCheckList )
2498
2499
2500 class Each(ParseExpression):
2501 """Requires all given ParseExpressions to be found, but in any order.
2502 Expressions may be separated by whitespace.
2503 May be constructed using the '&' operator.
2504 """
2505 def __init__( self, exprs, savelist = True ):
2506 super(Each,self).__init__(exprs, savelist)
2507 self.mayReturnEmpty = True
2508 for e in self.exprs:
2509 if not e.mayReturnEmpty:
2510 self.mayReturnEmpty = False
2511 break
2512 self.skipWhitespace = True
2513 self.initExprGroups = True
2514
2515 def parseImpl( self, instring, loc, doActions=True ):
2516 if self.initExprGroups:
2517 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2518 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2519 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2520 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2521 self.required += self.multirequired
2522 self.initExprGroups = False
2523 tmpLoc = loc
2524 tmpReqd = self.required[:]
2525 tmpOpt = self.optionals[:]
2526 matchOrder = []
2527
2528 keepMatching = True
2529 while keepMatching:
2530 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2531 failed = []
2532 for e in tmpExprs:
2533 try:
2534 tmpLoc = e.tryParse( instring, tmpLoc )
2535 except ParseException:
2536 failed.append(e)
2537 else:
2538 matchOrder.append(e)
2539 if e in tmpReqd:
2540 tmpReqd.remove(e)
2541 elif e in tmpOpt:
2542 tmpOpt.remove(e)
2543 if len(failed) == len(tmpExprs):
2544 keepMatching = False
2545
2546 if tmpReqd:
2547 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2548 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2549
2550 # add any unmatched Optionals, in case they have default values defined
2551 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
2552
2553 resultlist = []
2554 for e in matchOrder:
2555 loc,results = e._parse(instring,loc,doActions)
2556 resultlist.append(results)
2557
2558 finalResults = ParseResults([])
2559 for r in resultlist:
2560 dups = {}
2561 for k in r.keys():
2562 if k in finalResults.keys():
2563 tmp = ParseResults(finalResults[k])
2564 tmp += ParseResults(r[k])
2565 dups[k] = tmp
2566 finalResults += ParseResults(r)
2567 for k,v in dups.iteritems():
2568 finalResults[k] = v
2569 return loc, finalResults
2570
2571 def __str__( self ):
2572 if hasattr(self,"name"):
2573 return self.name
2574
2575 if self.strRepr is None:
2576 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2577
2578 return self.strRepr
2579
2580 def checkRecursion( self, parseElementList ):
2581 subRecCheckList = parseElementList[:] + [ self ]
2582 for e in self.exprs:
2583 e.checkRecursion( subRecCheckList )
2584
2585
2586 class ParseElementEnhance(ParserElement):
2587 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2588 def __init__( self, expr, savelist=False ):
2589 super(ParseElementEnhance,self).__init__(savelist)
2590 if isinstance( expr, basestring ):
2591 expr = Literal(expr)
2592 self.expr = expr
2593 self.strRepr = None
2594 if expr is not None:
2595 self.mayIndexError = expr.mayIndexError
2596 self.mayReturnEmpty = expr.mayReturnEmpty
2597 self.setWhitespaceChars( expr.whiteChars )
2598 self.skipWhitespace = expr.skipWhitespace
2599 self.saveAsList = expr.saveAsList
2600 self.callPreparse = expr.callPreparse
2601 self.ignoreExprs.extend(expr.ignoreExprs)
2602
2603 def parseImpl( self, instring, loc, doActions=True ):
2604 if self.expr is not None:
2605 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2606 else:
2607 raise ParseException("",loc,self.errmsg,self)
2608
2609 def leaveWhitespace( self ):
2610 self.skipWhitespace = False
2611 self.expr = self.expr.copy()
2612 if self.expr is not None:
2613 self.expr.leaveWhitespace()
2614 return self
2615
2616 def ignore( self, other ):
2617 if isinstance( other, Suppress ):
2618 if other not in self.ignoreExprs:
2619 super( ParseElementEnhance, self).ignore( other )
2620 if self.expr is not None:
2621 self.expr.ignore( self.ignoreExprs[-1] )
2622 else:
2623 super( ParseElementEnhance, self).ignore( other )
2624 if self.expr is not None:
2625 self.expr.ignore( self.ignoreExprs[-1] )
2626 return self
2627
2628 def streamline( self ):
2629 super(ParseElementEnhance,self).streamline()
2630 if self.expr is not None:
2631 self.expr.streamline()
2632 return self
2633
2634 def checkRecursion( self, parseElementList ):
2635 if self in parseElementList:
2636 raise RecursiveGrammarException( parseElementList+[self] )
2637 subRecCheckList = parseElementList[:] + [ self ]
2638 if self.expr is not None:
2639 self.expr.checkRecursion( subRecCheckList )
2640
2641 def validate( self, validateTrace=[] ):
2642 tmp = validateTrace[:]+[self]
2643 if self.expr is not None:
2644 self.expr.validate(tmp)
2645 self.checkRecursion( [] )
2646
2647 def __str__( self ):
2648 try:
2649 return super(ParseElementEnhance,self).__str__()
2650 except:
2651 pass
2652
2653 if self.strRepr is None and self.expr is not None:
2654 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2655 return self.strRepr
2656
2657
2658 class FollowedBy(ParseElementEnhance):
2659 """Lookahead matching of the given parse expression. FollowedBy
2660 does *not* advance the parsing position within the input string, it only
2661 verifies that the specified parse expression matches at the current
2662 position. FollowedBy always returns a null token list."""
2663 def __init__( self, expr ):
2664 super(FollowedBy,self).__init__(expr)
2665 self.mayReturnEmpty = True
2666
2667 def parseImpl( self, instring, loc, doActions=True ):
2668 self.expr.tryParse( instring, loc )
2669 return loc, []
2670
2671
2672 class NotAny(ParseElementEnhance):
2673 """Lookahead to disallow matching with the given parse expression. NotAny
2674 does *not* advance the parsing position within the input string, it only
2675 verifies that the specified parse expression does *not* match at the current
2676 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2677 always returns a null token list. May be constructed using the '~' operator."""
2678 def __init__( self, expr ):
2679 super(NotAny,self).__init__(expr)
2680 #~ self.leaveWhitespace()
2681 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2682 self.mayReturnEmpty = True
2683 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2684 #self.myException = ParseException("",0,self.errmsg,self)
2685
2686 def parseImpl( self, instring, loc, doActions=True ):
2687 try:
2688 self.expr.tryParse( instring, loc )
2689 except (ParseException,IndexError):
2690 pass
2691 else:
2692 #~ raise ParseException(instring, loc, self.errmsg )
2693 exc = self.myException
2694 exc.loc = loc
2695 exc.pstr = instring
2696 raise exc
2697 return loc, []
2698
2699 def __str__( self ):
2700 if hasattr(self,"name"):
2701 return self.name
2702
2703 if self.strRepr is None:
2704 self.strRepr = "~{" + _ustr(self.expr) + "}"
2705
2706 return self.strRepr
2707
2708
2709 class ZeroOrMore(ParseElementEnhance):
2710 """Optional repetition of zero or more of the given expression."""
2711 def __init__( self, expr ):
2712 super(ZeroOrMore,self).__init__(expr)
2713 self.mayReturnEmpty = True
2714
2715 def parseImpl( self, instring, loc, doActions=True ):
2716 tokens = []
2717 try:
2718 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2719 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2720 while 1:
2721 if hasIgnoreExprs:
2722 preloc = self._skipIgnorables( instring, loc )
2723 else:
2724 preloc = loc
2725 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2726 if tmptokens or tmptokens.keys():
2727 tokens += tmptokens
2728 except (ParseException,IndexError):
2729 pass
2730
2731 return loc, tokens
2732
2733 def __str__( self ):
2734 if hasattr(self,"name"):
2735 return self.name
2736
2737 if self.strRepr is None:
2738 self.strRepr = "[" + _ustr(self.expr) + "]..."
2739
2740 return self.strRepr
2741
2742 def setResultsName( self, name, listAllMatches=False ):
2743 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2744 ret.saveAsList = True
2745 return ret
2746
2747
2748 class OneOrMore(ParseElementEnhance):
2749 """Repetition of one or more of the given expression."""
2750 def parseImpl( self, instring, loc, doActions=True ):
2751 # must be at least one
2752 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2753 try:
2754 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2755 while 1:
2756 if hasIgnoreExprs:
2757 preloc = self._skipIgnorables( instring, loc )
2758 else:
2759 preloc = loc
2760 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2761 if tmptokens or tmptokens.keys():
2762 tokens += tmptokens
2763 except (ParseException,IndexError):
2764 pass
2765
2766 return loc, tokens
2767
2768 def __str__( self ):
2769 if hasattr(self,"name"):
2770 return self.name
2771
2772 if self.strRepr is None:
2773 self.strRepr = "{" + _ustr(self.expr) + "}..."
2774
2775 return self.strRepr
2776
2777 def setResultsName( self, name, listAllMatches=False ):
2778 ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2779 ret.saveAsList = True
2780 return ret
2781
2782 class _NullToken(object):
2783 def __bool__(self):
2784 return False
2785 __nonzero__ = __bool__
2786 def __str__(self):
2787 return ""
2788
2789 _optionalNotMatched = _NullToken()
2790 class Optional(ParseElementEnhance):
2791 """Optional matching of the given expression.
2792 A default return string can also be specified, if the optional expression
2793 is not found.
2794 """
2795 def __init__( self, exprs, default=_optionalNotMatched ):
2796 super(Optional,self).__init__( exprs, savelist=False )
2797 self.defaultValue = default
2798 self.mayReturnEmpty = True
2799
2800 def parseImpl( self, instring, loc, doActions=True ):
2801 try:
2802 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2803 except (ParseException,IndexError):
2804 if self.defaultValue is not _optionalNotMatched:
2805 if self.expr.resultsName:
2806 tokens = ParseResults([ self.defaultValue ])
2807 tokens[self.expr.resultsName] = self.defaultValue
2808 else:
2809 tokens = [ self.defaultValue ]
2810 else:
2811 tokens = []
2812 return loc, tokens
2813
2814 def __str__( self ):
2815 if hasattr(self,"name"):
2816 return self.name
2817
2818 if self.strRepr is None:
2819 self.strRepr = "[" + _ustr(self.expr) + "]"
2820
2821 return self.strRepr
2822
2823
2824 class SkipTo(ParseElementEnhance):
2825 """Token for skipping over all undefined text until the matched expression is found.
2826 If include is set to true, the matched expression is also parsed (the skipped text
2827 and matched expression are returned as a 2-element list). The ignore
2828 argument is used to define grammars (typically quoted strings and comments) that
2829 might contain false matches.
2830 """
2831 def __init__( self, other, include=False, ignore=None, failOn=None ):
2832 super( SkipTo, self ).__init__( other )
2833 self.ignoreExpr = ignore
2834 self.mayReturnEmpty = True
2835 self.mayIndexError = False
2836 self.includeMatch = include
2837 self.asList = False
2838 if failOn is not None and isinstance(failOn, basestring):
2839 self.failOn = Literal(failOn)
2840 else:
2841 self.failOn = failOn
2842 self.errmsg = "No match found for "+_ustr(self.expr)
2843 #self.myException = ParseException("",0,self.errmsg,self)
2844
2845 def parseImpl( self, instring, loc, doActions=True ):
2846 startLoc = loc
2847 instrlen = len(instring)
2848 expr = self.expr
2849 failParse = False
2850 while loc <= instrlen:
2851 try:
2852 if self.failOn:
2853 try:
2854 self.failOn.tryParse(instring, loc)
2855 except ParseBaseException:
2856 pass
2857 else:
2858 failParse = True
2859 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2860 failParse = False
2861 if self.ignoreExpr is not None:
2862 while 1:
2863 try:
2864 loc = self.ignoreExpr.tryParse(instring,loc)
2865 print "found ignoreExpr, advance to", loc
2866 except ParseBaseException:
2867 break
2868 expr._parse( instring, loc, doActions=False, callPreParse=False )
2869 skipText = instring[startLoc:loc]
2870 if self.includeMatch:
2871 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2872 if mat:
2873 skipRes = ParseResults( skipText )
2874 skipRes += mat
2875 return loc, [ skipRes ]
2876 else:
2877 return loc, [ skipText ]
2878 else:
2879 return loc, [ skipText ]
2880 except (ParseException,IndexError):
2881 if failParse:
2882 raise
2883 else:
2884 loc += 1
2885 exc = self.myException
2886 exc.loc = loc
2887 exc.pstr = instring
2888 raise exc
2889
2890 class Forward(ParseElementEnhance):
2891 """Forward declaration of an expression to be defined later -
2892 used for recursive grammars, such as algebraic infix notation.
2893 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2894
2895 Note: take care when assigning to Forward not to overlook precedence of operators.
2896 Specifically, '|' has a lower precedence than '<<', so that::
2897 fwdExpr << a | b | c
2898 will actually be evaluated as::
2899 (fwdExpr << a) | b | c
2900 thereby leaving b and c out as parseable alternatives. It is recommended that you
2901 explicitly group the values inserted into the Forward::
2902 fwdExpr << (a | b | c)
2903 """
2904 def __init__( self, other=None ):
2905 super(Forward,self).__init__( other, savelist=False )
2906
2907 def __lshift__( self, other ):
2908 if isinstance( other, basestring ):
2909 other = Literal(other)
2910 self.expr = other
2911 self.mayReturnEmpty = other.mayReturnEmpty
2912 self.strRepr = None
2913 self.mayIndexError = self.expr.mayIndexError
2914 self.mayReturnEmpty = self.expr.mayReturnEmpty
2915 self.setWhitespaceChars( self.expr.whiteChars )
2916 self.skipWhitespace = self.expr.skipWhitespace
2917 self.saveAsList = self.expr.saveAsList
2918 self.ignoreExprs.extend(self.expr.ignoreExprs)
2919 return None
2920
2921 def leaveWhitespace( self ):
2922 self.skipWhitespace = False
2923 return self
2924
2925 def streamline( self ):
2926 if not self.streamlined:
2927 self.streamlined = True
2928 if self.expr is not None:
2929 self.expr.streamline()
2930 return self
2931
2932 def validate( self, validateTrace=[] ):
2933 if self not in validateTrace:
2934 tmp = validateTrace[:]+[self]
2935 if self.expr is not None:
2936 self.expr.validate(tmp)
2937 self.checkRecursion([])
2938
2939 def __str__( self ):
2940 if hasattr(self,"name"):
2941 return self.name
2942
2943 self._revertClass = self.__class__
2944 self.__class__ = _ForwardNoRecurse
2945 try:
2946 if self.expr is not None:
2947 retString = _ustr(self.expr)
2948 else:
2949 retString = "None"
2950 finally:
2951 self.__class__ = self._revertClass
2952 return self.__class__.__name__ + ": " + retString
2953
2954 def copy(self):
2955 if self.expr is not None:
2956 return super(Forward,self).copy()
2957 else:
2958 ret = Forward()
2959 ret << self
2960 return ret
2961
2962 class _ForwardNoRecurse(Forward):
2963 def __str__( self ):
2964 return "..."
2965
2966 class TokenConverter(ParseElementEnhance):
2967 """Abstract subclass of ParseExpression, for converting parsed results."""
2968 def __init__( self, expr, savelist=False ):
2969 super(TokenConverter,self).__init__( expr )#, savelist )
2970 self.saveAsList = False
2971
2972 class Upcase(TokenConverter):
2973 """Converter to upper case all matching tokens."""
2974 def __init__(self, *args):
2975 super(Upcase,self).__init__(*args)
2976 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2977 DeprecationWarning,stacklevel=2)
2978
2979 def postParse( self, instring, loc, tokenlist ):
2980 return list(map( string.upper, tokenlist ))
2981
2982
2983 class Combine(TokenConverter):
2984 """Converter to concatenate all matching tokens to a single string.
2985 By default, the matching patterns must also be contiguous in the input string;
2986 this can be disabled by specifying 'adjacent=False' in the constructor.
2987 """
2988 def __init__( self, expr, joinString="", adjacent=True ):
2989 super(Combine,self).__init__( expr )
2990 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2991 if adjacent:
2992 self.leaveWhitespace()
2993 self.adjacent = adjacent
2994 self.skipWhitespace = True
2995 self.joinString = joinString
2996
2997 def ignore( self, other ):
2998 if self.adjacent:
2999 ParserElement.ignore(self, other)
3000 else:
3001 super( Combine, self).ignore( other )
3002 return self
3003
3004 def postParse( self, instring, loc, tokenlist ):
3005 retToks = tokenlist.copy()
3006 del retToks[:]
3007 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3008
3009 if self.resultsName and len(retToks.keys())>0:
3010 return [ retToks ]
3011 else:
3012 return retToks
3013
3014 class Group(TokenConverter):
3015 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3016 def __init__( self, expr ):
3017 super(Group,self).__init__( expr )
3018 self.saveAsList = True
3019
3020 def postParse( self, instring, loc, tokenlist ):
3021 return [ tokenlist ]
3022
3023 class Dict(TokenConverter):
3024 """Converter to return a repetitive expression as a list, but also as a dictionary.
3025 Each element can also be referenced using the first token in the expression as its key.
3026 Useful for tabular report scraping when the first column can be used as a item key.
3027 """
3028 def __init__( self, exprs ):
3029 super(Dict,self).__init__( exprs )
3030 self.saveAsList = True
3031
3032 def postParse( self, instring, loc, tokenlist ):
3033 for i,tok in enumerate(tokenlist):
3034 if len(tok) == 0:
3035 continue
3036 ikey = tok[0]
3037 if isinstance(ikey,int):
3038 ikey = _ustr(tok[0]).strip()
3039 if len(tok)==1:
3040 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3041 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3042 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3043 else:
3044 dictvalue = tok.copy() #ParseResults(i)
3045 del dictvalue[0]
3046 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3047 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3048 else:
3049 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3050
3051 if self.resultsName:
3052 return [ tokenlist ]
3053 else:
3054 return tokenlist
3055
3056
3057 class Suppress(TokenConverter):
3058 """Converter for ignoring the results of a parsed expression."""
3059 def postParse( self, instring, loc, tokenlist ):
3060 return []
3061
3062 def suppress( self ):
3063 return self
3064
3065
3066 class OnlyOnce(object):
3067 """Wrapper for parse actions, to ensure they are only called once."""
3068 def __init__(self, methodCall):
3069 self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3070 self.called = False
3071 def __call__(self,s,l,t):
3072 if not self.called:
3073 results = self.callable(s,l,t)
3074 self.called = True
3075 return results
3076 raise ParseException(s,l,"")
3077 def reset(self):
3078 self.called = False
3079
3080 def traceParseAction(f):
3081 """Decorator for debugging parse actions."""
3082 f = ParserElement._normalizeParseActionArgs(f)
3083 def z(*paArgs):
3084 thisFunc = f.func_name
3085 s,l,t = paArgs[-3:]
3086 if len(paArgs)>3:
3087 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3088 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3089 try:
3090 ret = f(*paArgs)
3091 except Exception, exc:
3092 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3093 raise
3094 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3095 return ret
3096 try:
3097 z.__name__ = f.__name__
3098 except AttributeError:
3099 pass
3100 return z
3101
3102 #
3103 # global helpers
3104 #
3105 def delimitedList( expr, delim=",", combine=False ):
3106 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3107 By default, the list elements and delimiters can have intervening whitespace, and
3108 comments, but this can be overridden by passing 'combine=True' in the constructor.
3109 If combine is set to True, the matching tokens are returned as a single token
3110 string, with the delimiters included; otherwise, the matching tokens are returned
3111 as a list of tokens, with the delimiters suppressed.
3112 """
3113 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3114 if combine:
3115 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3116 else:
3117 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3118
3119 def countedArray( expr ):
3120 """Helper to define a counted list of expressions.
3121 This helper defines a pattern of the form::
3122 integer expr expr expr...
3123 where the leading integer tells how many expr expressions follow.
3124 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3125 """
3126 arrayExpr = Forward()
3127 def countFieldParseAction(s,l,t):
3128 n = int(t[0])
3129 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3130 return []
3131 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3132
3133 def _flatten(L):
3134 if type(L) is not list: return [L]
3135 if L == []: return L
3136 return _flatten(L[0]) + _flatten(L[1:])
3137
3138 def matchPreviousLiteral(expr):
3139 """Helper to define an expression that is indirectly defined from
3140 the tokens matched in a previous expression, that is, it looks
3141 for a 'repeat' of a previous expression. For example::
3142 first = Word(nums)
3143 second = matchPreviousLiteral(first)
3144 matchExpr = first + ":" + second
3145 will match "1:1", but not "1:2". Because this matches a
3146 previous literal, will also match the leading "1:1" in "1:10".
3147 If this is not desired, use matchPreviousExpr.
3148 Do *not* use with packrat parsing enabled.
3149 """
3150 rep = Forward()
3151 def copyTokenToRepeater(s,l,t):
3152 if t:
3153 if len(t) == 1:
3154 rep << t[0]
3155 else:
3156 # flatten t tokens
3157 tflat = _flatten(t.asList())
3158 rep << And( [ Literal(tt) for tt in tflat ] )
3159 else:
3160 rep << Empty()
3161 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3162 return rep
3163
3164 def matchPreviousExpr(expr):
3165 """Helper to define an expression that is indirectly defined from
3166 the tokens matched in a previous expression, that is, it looks
3167 for a 'repeat' of a previous expression. For example::
3168 first = Word(nums)
3169 second = matchPreviousExpr(first)
3170 matchExpr = first + ":" + second
3171 will match "1:1", but not "1:2". Because this matches by
3172 expressions, will *not* match the leading "1:1" in "1:10";
3173 the expressions are evaluated first, and then compared, so
3174 "1" is compared with "10".
3175 Do *not* use with packrat parsing enabled.
3176 """
3177 rep = Forward()
3178 e2 = expr.copy()
3179 rep << e2
3180 def copyTokenToRepeater(s,l,t):
3181 matchTokens = _flatten(t.asList())
3182 def mustMatchTheseTokens(s,l,t):
3183 theseTokens = _flatten(t.asList())
3184 if theseTokens != matchTokens:
3185 raise ParseException("",0,"")
3186 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3187 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3188 return rep
3189
3190 def _escapeRegexRangeChars(s):
3191 #~ escape these chars: ^-]
3192 for c in r"\^-]":
3193 s = s.replace(c,_bslash+c)
3194 s = s.replace("\n",r"\n")
3195 s = s.replace("\t",r"\t")
3196 return _ustr(s)
3197
3198 def oneOf( strs, caseless=False, useRegex=True ):
3199 """Helper to quickly define a set of alternative Literals, and makes sure to do
3200 longest-first testing when there is a conflict, regardless of the input order,
3201 but returns a MatchFirst for best performance.
3202
3203 Parameters:
3204 - strs - a string of space-delimited literals, or a list of string literals
3205 - caseless - (default=False) - treat all literals as caseless
3206 - useRegex - (default=True) - as an optimization, will generate a Regex
3207 object; otherwise, will generate a MatchFirst object (if caseless=True, or
3208 if creating a Regex raises an exception)
3209 """
3210 if caseless:
3211 isequal = ( lambda a,b: a.upper() == b.upper() )
3212 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3213 parseElementClass = CaselessLiteral
3214 else:
3215 isequal = ( lambda a,b: a == b )
3216 masks = ( lambda a,b: b.startswith(a) )
3217 parseElementClass = Literal
3218
3219 if isinstance(strs,(list,tuple)):
3220 symbols = list(strs[:])
3221 elif isinstance(strs,basestring):
3222 symbols = strs.split()
3223 else:
3224 warnings.warn("Invalid argument to oneOf, expected string or list",
3225 SyntaxWarning, stacklevel=2)
3226
3227 i = 0
3228 while i < len(symbols)-1:
3229 cur = symbols[i]
3230 for j,other in enumerate(symbols[i+1:]):
3231 if ( isequal(other, cur) ):
3232 del symbols[i+j+1]
3233 break
3234 elif ( masks(cur, other) ):
3235 del symbols[i+j+1]
3236 symbols.insert(i,other)
3237 cur = other
3238 break
3239 else:
3240 i += 1
3241
3242 if not caseless and useRegex:
3243 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3244 try:
3245 if len(symbols)==len("".join(symbols)):
3246 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3247 else:
3248 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3249 except:
3250 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3251 SyntaxWarning, stacklevel=2)
3252
3253
3254 # last resort, just use MatchFirst
3255 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3256
3257 def dictOf( key, value ):
3258 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3259 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3260 in the proper order. The key pattern can include delimiting markers or punctuation,
3261 as long as they are suppressed, thereby leaving the significant key text. The value
3262 pattern can include named results, so that the Dict results can include named token
3263 fields.
3264 """
3265 return Dict( ZeroOrMore( Group ( key + value ) ) )
3266
3267 def originalTextFor(expr, asString=True):
3268 """Helper to return the original, untokenized text for a given expression. Useful to
3269 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3270 revert separate tokens with intervening whitespace back to the original matching
3271 input text. Simpler to use than the parse action keepOriginalText, and does not
3272 require the inspect module to chase up the call stack. By default, returns a
3273 string containing the original parsed text.
3274
3275 If the optional asString argument is passed as False, then the return value is a
3276 ParseResults containing any results names that were originally matched, and a
3277 single token containing the original matched text from the input string. So if
3278 the expression passed to originalTextFor contains expressions with defined
3279 results names, you must set asString to False if you want to preserve those
3280 results name values."""
3281 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3282 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
3283 if asString:
3284 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3285 else:
3286 def extractText(s,l,t):
3287 del t[:]
3288 t.insert(0, s[t._original_start:t._original_end])
3289 del t["_original_start"]
3290 del t["_original_end"]
3291 matchExpr.setParseAction(extractText)
3292 return matchExpr
3293
3294 # convenience constants for positional expressions
3295 empty = Empty().setName("empty")
3296 lineStart = LineStart().setName("lineStart")
3297 lineEnd = LineEnd().setName("lineEnd")
3298 stringStart = StringStart().setName("stringStart")
3299 stringEnd = StringEnd().setName("stringEnd")
3300
3301 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3302 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3303 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3304 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3305 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3306 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3307 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3308
3309 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3310
3311 def srange(s):
3312 r"""Helper to easily define string ranges for use in Word construction. Borrows
3313 syntax from regexp '[]' string range definitions::
3314 srange("[0-9]") -> "0123456789"
3315 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3316 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3317 The input string must be enclosed in []'s, and the returned string is the expanded
3318 character set joined into a single string.
3319 The values enclosed in the []'s may be::
3320 a single character
3321 an escaped character with a leading backslash (such as \- or \])
3322 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3323 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3324 a range of any of the above, separated by a dash ('a-z', etc.)
3325 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3326 """
3327 try:
3328 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3329 except:
3330 return ""
3331
3332 def matchOnlyAtCol(n):
3333 """Helper method for defining parse actions that require matching at a specific
3334 column in the input text.
3335 """
3336 def verifyCol(strg,locn,toks):
3337 if col(locn,strg) != n:
3338 raise ParseException(strg,locn,"matched token not at column %d" % n)
3339 return verifyCol
3340
3341 def replaceWith(replStr):
3342 """Helper method for common parse actions that simply return a literal value. Especially
3343 useful when used with transformString().
3344 """
3345 def _replFunc(*args):
3346 return [replStr]
3347 return _replFunc
3348
3349 def removeQuotes(s,l,t):
3350 """Helper parse action for removing quotation marks from parsed quoted strings.
3351 To use, add this parse action to quoted string using::
3352 quotedString.setParseAction( removeQuotes )
3353 """
3354 return t[0][1:-1]
3355
3356 def upcaseTokens(s,l,t):
3357 """Helper parse action to convert tokens to upper case."""
3358 return [ tt.upper() for tt in map(_ustr,t) ]
3359
3360 def downcaseTokens(s,l,t):
3361 """Helper parse action to convert tokens to lower case."""
3362 return [ tt.lower() for tt in map(_ustr,t) ]
3363
3364 def keepOriginalText(s,startLoc,t):
3365 """Helper parse action to preserve original parsed text,
3366 overriding any nested parse actions."""
3367 try:
3368 endloc = getTokensEndLoc()
3369 except ParseException:
3370 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3371 del t[:]
3372 t += ParseResults(s[startLoc:endloc])
3373 return t
3374
3375 def getTokensEndLoc():
3376 """Method to be called from within a parse action to determine the end
3377 location of the parsed tokens."""
3378 import inspect
3379 fstack = inspect.stack()
3380 try:
3381 # search up the stack (through intervening argument normalizers) for correct calling routine
3382 for f in fstack[2:]:
3383 if f[3] == "_parseNoCache":
3384 endloc = f[0].f_locals["loc"]
3385 return endloc
3386 else:
3387 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3388 finally:
3389 del fstack
3390
3391 def _makeTags(tagStr, xml):
3392 """Internal helper to construct opening and closing tag expressions, given a tag name"""
3393 if isinstance(tagStr,basestring):
3394 resname = tagStr
3395 tagStr = Keyword(tagStr, caseless=not xml)
3396 else:
3397 resname = tagStr.name
3398
3399 tagAttrName = Word(alphas,alphanums+"_-:")
3400 if (xml):
3401 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3402 openTag = Suppress("<") + tagStr + \
3403 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
3404 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3405 else:
3406 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
3407 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
3408 openTag = Suppress("<") + tagStr + \
3409 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
3410 Optional( Suppress("=") + tagAttrValue ) ))) + \
3411 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3412 closeTag = Combine(_L("</") + tagStr + ">")
3413
3414 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3415 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3416
3417 return openTag, closeTag
3418
3419 def makeHTMLTags(tagStr):
3420 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3421 return _makeTags( tagStr, False )
3422
3423 def makeXMLTags(tagStr):
3424 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3425 return _makeTags( tagStr, True )
3426
3427 def withAttribute(*args,**attrDict):
3428 """Helper to create a validating parse action to be used with start tags created
3429 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3430 with a required attribute value, to avoid false matches on common tags such as
3431 <TD> or <DIV>.
3432
3433 Call withAttribute with a series of attribute names and values. Specify the list
3434 of filter attributes names and values as:
3435 - keyword arguments, as in (class="Customer",align="right"), or
3436 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3437 For attribute names with a namespace prefix, you must use the second form. Attribute
3438 names are matched insensitive to upper/lower case.
3439
3440 To verify that the attribute exists, but without specifying a value, pass
3441 withAttribute.ANY_VALUE as the value.
3442 """
3443 if args:
3444 attrs = args[:]
3445 else:
3446 attrs = attrDict.iteritems()
3447 attrs = [(k,v) for k,v in attrs]
3448 def pa(s,l,tokens):
3449 for attrName,attrValue in attrs:
3450 if attrName not in tokens:
3451 raise ParseException(s,l,"no matching attribute " + attrName)
3452 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3453 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3454 (attrName, tokens[attrName], attrValue))
3455 return pa
3456 withAttribute.ANY_VALUE = object()
3457
3458 opAssoc = _Constants()
3459 opAssoc.LEFT = object()
3460 opAssoc.RIGHT = object()
3461
3462 def operatorPrecedence( baseExpr, opList ):
3463 """Helper method for constructing grammars of expressions made up of
3464 operators working in a precedence hierarchy. Operators may be unary or
3465 binary, left- or right-associative. Parse actions can also be attached
3466 to operator expressions.
3467
3468 Parameters:
3469 - baseExpr - expression representing the most basic element for the nested
3470 - opList - list of tuples, one for each operator precedence level in the
3471 expression grammar; each tuple is of the form
3472 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3473 - opExpr is the pyparsing expression for the operator;
3474 may also be a string, which will be converted to a Literal;
3475 if numTerms is 3, opExpr is a tuple of two expressions, for the
3476 two operators separating the 3 terms
3477 - numTerms is the number of terms for this operator (must
3478 be 1, 2, or 3)
3479 - rightLeftAssoc is the indicator whether the operator is
3480 right or left associative, using the pyparsing-defined
3481 constants opAssoc.RIGHT and opAssoc.LEFT.
3482 - parseAction is the parse action to be associated with
3483 expressions matching this operator expression (the
3484 parse action tuple member may be omitted)
3485 """
3486 ret = Forward()
3487 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3488 for i,operDef in enumerate(opList):
3489 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3490 if arity == 3:
3491 if opExpr is None or len(opExpr) != 2:
3492 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3493 opExpr1, opExpr2 = opExpr
3494 thisExpr = Forward()#.setName("expr%d" % i)
3495 if rightLeftAssoc == opAssoc.LEFT:
3496 if arity == 1:
3497 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3498 elif arity == 2:
3499 if opExpr is not None:
3500 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3501 else:
3502 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3503 elif arity == 3:
3504 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3505 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3506 else:
3507 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3508 elif rightLeftAssoc == opAssoc.RIGHT:
3509 if arity == 1:
3510 # try to avoid LR with this extra test
3511 if not isinstance(opExpr, Optional):
3512 opExpr = Optional(opExpr)
3513 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3514 elif arity == 2:
3515 if opExpr is not None:
3516 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3517 else:
3518 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3519 elif arity == 3:
3520 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3521 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3522 else:
3523 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3524 else:
3525 raise ValueError("operator must indicate right or left associativity")
3526 if pa:
3527 matchExpr.setParseAction( pa )
3528 thisExpr << ( matchExpr | lastExpr )
3529 lastExpr = thisExpr
3530 ret << lastExpr
3531 return ret
3532
3533 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3534 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3535 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3536 unicodeString = Combine(_L('u') + quotedString.copy())
3537
3538 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3539 """Helper method for defining nested lists enclosed in opening and closing
3540 delimiters ("(" and ")" are the default).
3541
3542 Parameters:
3543 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3544 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3545 - content - expression for items within the nested lists (default=None)
3546 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3547
3548 If an expression is not provided for the content argument, the nested
3549 expression will capture all whitespace-delimited content between delimiters
3550 as a list of separate values.
3551
3552 Use the ignoreExpr argument to define expressions that may contain
3553 opening or closing characters that should not be treated as opening
3554 or closing characters for nesting, such as quotedString or a comment
3555 expression. Specify multiple expressions using an Or or MatchFirst.
3556 The default is quotedString, but if no expressions are to be ignored,
3557 then pass None for this argument.
3558 """
3559 if opener == closer:
3560 raise ValueError("opening and closing strings cannot be the same")
3561 if content is None:
3562 if isinstance(opener,basestring) and isinstance(closer,basestring):
3563 if len(opener) == 1 and len(closer)==1:
3564 if ignoreExpr is not None:
3565 content = (Combine(OneOrMore(~ignoreExpr +
3566 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3567 ).setParseAction(lambda t:t[0].strip()))
3568 else:
3569 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3570 ).setParseAction(lambda t:t[0].strip()))
3571 else:
3572 if ignoreExpr is not None:
3573 content = (Combine(OneOrMore(~ignoreExpr +
3574 ~Literal(opener) + ~Literal(closer) +
3575 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3576 ).setParseAction(lambda t:t[0].strip()))
3577 else:
3578 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3579 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3580 ).setParseAction(lambda t:t[0].strip()))
3581 else:
3582 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3583 ret = Forward()
3584 if ignoreExpr is not None:
3585 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3586 else:
3587 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3588 return ret
3589
3590 def indentedBlock(blockStatementExpr, indentStack, indent=True):
3591 """Helper method for defining space-delimited indentation blocks, such as
3592 those used to define block statements in Python source code.
3593
3594 Parameters:
3595 - blockStatementExpr - expression defining syntax of statement that
3596 is repeated within the indented block
3597 - indentStack - list created by caller to manage indentation stack
3598 (multiple statementWithIndentedBlock expressions within a single grammar
3599 should share a common indentStack)
3600 - indent - boolean indicating whether block must be indented beyond the
3601 the current level; set to False for block of left-most statements
3602 (default=True)
3603
3604 A valid block must contain at least one blockStatement.
3605 """
3606 def checkPeerIndent(s,l,t):
3607 if l >= len(s): return
3608 curCol = col(l,s)
3609 if curCol != indentStack[-1]:
3610 if curCol > indentStack[-1]:
3611 raise ParseFatalException(s,l,"illegal nesting")
3612 raise ParseException(s,l,"not a peer entry")
3613
3614 def checkSubIndent(s,l,t):
3615 curCol = col(l,s)
3616 if curCol > indentStack[-1]:
3617 indentStack.append( curCol )
3618 else:
3619 raise ParseException(s,l,"not a subentry")
3620
3621 def checkUnindent(s,l,t):
3622 if l >= len(s): return
3623 curCol = col(l,s)
3624 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3625 raise ParseException(s,l,"not an unindent")
3626 indentStack.pop()
3627
3628 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3629 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3630 PEER = Empty().setParseAction(checkPeerIndent)
3631 UNDENT = Empty().setParseAction(checkUnindent)
3632 if indent:
3633 smExpr = Group( Optional(NL) +
3634 FollowedBy(blockStatementExpr) +
3635 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3636 else:
3637 smExpr = Group( Optional(NL) +
3638 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3639 blockStatementExpr.ignore(_bslash + LineEnd())
3640 return smExpr
3641
3642 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3643 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3644
3645 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3646 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3647 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3648 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3649
3650 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
3651 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3652
3653 htmlComment = Regex(r"<!--[\s\S]*?-->")
3654 restOfLine = Regex(r".*").leaveWhitespace()
3655 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3656 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3657
3658 javaStyleComment = cppStyleComment
3659 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3660 _noncomma = "".join( [ c for c in printables if c != "," ] )
3661 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3662 Optional( Word(" \t") +
3663 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3664 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3665
3666
3667 if __name__ == "__main__":
3668
3669 def test( teststring ):
3670 try:
3671 tokens = simpleSQL.parseString( teststring )
3672 tokenlist = tokens.asList()
3673 print (teststring + "->" + str(tokenlist))
3674 print ("tokens = " + str(tokens))
3675 print ("tokens.columns = " + str(tokens.columns))
3676 print ("tokens.tables = " + str(tokens.tables))
3677 print (tokens.asXML("SQL",True))
3678 except ParseBaseException,err:
3679 print (teststring + "->")
3680 print (err.line)
3681 print (" "*(err.column-1) + "^")
3682 print (err)
3683 print()
3684
3685 selectToken = CaselessLiteral( "select" )
3686 fromToken = CaselessLiteral( "from" )
3687
3688 ident = Word( alphas, alphanums + "_$" )
3689 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3690 columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3691 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3692 tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
3693 simpleSQL = ( selectToken + \
3694 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3695 fromToken + \
3696 tableNameList.setResultsName( "tables" ) )
3697
3698 test( "SELECT * from XYZZY, ABC" )
3699 test( "select * from SYS.XYZZY" )
3700 test( "Select A from Sys.dual" )
3701 test( "Select AA,BB,CC from Sys.dual" )
3702 test( "Select A, B, C from Sys.dual" )
3703 test( "Select A, B, C from Sys.dual" )
3704 test( "Xelect A, B, C from Sys.dual" )
3705 test( "Select A, B, C frox Sys.dual" )
3706 test( "Select" )
3707 test( "Select ^^^ frox Sys.dual" )
3708 test( "Select A, B, C from Sys.dual, Table2 " )
General Comments 0
You need to be logged in to leave comments. Login now