##// END OF EJS Templates
pyparsing 1.5.2 added to externals....
Brian Granger -
Show More
This diff has been collapsed as it changes many lines, (3707 lines changed) Show them Hide them
@@ -0,0 +1,3707 b''
1 # module pyparsing.py
2 #
3 # Copyright (c) 2003-2009 Paul T. McGuire
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #
24 #from __future__ import generators
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36
37 from pyparsing import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45 The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
51
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
54
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59 """
60
61 __version__ = "1.5.2"
62 __versionTime__ = "17 February 2009 19:45"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65 import string
66 from weakref import ref as wkref
67 import copy
68 import sys
69 import warnings
70 import re
71 import sre_constants
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
73
74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock', 'originalTextFor',
92 ]
93
94
95 """
96 Detect if we are running version 3.X and make appropriate changes
97 Robert A. Clark
98 """
99 if sys.version_info[0] > 2:
100 _PY3K = True
101 _MAX_INT = sys.maxsize
102 basestring = str
103 else:
104 _PY3K = False
105 _MAX_INT = sys.maxint
106
107 if not _PY3K:
108 def _ustr(obj):
109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
111 then < returns the unicode object | encodes it with the default encoding | ... >.
112 """
113 if isinstance(obj,unicode):
114 return obj
115
116 try:
117 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
118 # it won't break any existing code.
119 return str(obj)
120
121 except UnicodeEncodeError:
122 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
123 # state that "The return value must be a string object". However, does a
124 # unicode object (being a subclass of basestring) count as a "string
125 # object"?
126 # If so, then return a unicode object:
127 return unicode(obj)
128 # Else encode it... but how? There are many choices... :)
129 # Replace unprintables with escape codes?
130 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
131 # Replace unprintables with question marks?
132 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
133 # ...
134 else:
135 _ustr = str
136 unichr = chr
137
138 if not _PY3K:
139 def _str2dict(strg):
140 return dict( [(c,0) for c in strg] )
141 else:
142 _str2dict = set
143
144 def _xml_escape(data):
145 """Escape &, <, >, ", ', etc. in a string of data."""
146
147 # ampersand must be replaced first
148 from_symbols = '&><"\''
149 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
150 for from_,to_ in zip(from_symbols, to_symbols):
151 data = data.replace(from_, to_)
152 return data
153
154 class _Constants(object):
155 pass
156
157 if not _PY3K:
158 alphas = string.lowercase + string.uppercase
159 else:
160 alphas = string.ascii_lowercase + string.ascii_uppercase
161 nums = string.digits
162 hexnums = nums + "ABCDEFabcdef"
163 alphanums = alphas + nums
164 _bslash = chr(92)
165 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
166
167 class ParseBaseException(Exception):
168 """base exception class for all parsing runtime exceptions"""
169 # Performance tuning: we construct a *lot* of these, so keep this
170 # constructor as small and fast as possible
171 def __init__( self, pstr, loc=0, msg=None, elem=None ):
172 self.loc = loc
173 if msg is None:
174 self.msg = pstr
175 self.pstr = ""
176 else:
177 self.msg = msg
178 self.pstr = pstr
179 self.parserElement = elem
180
181 def __getattr__( self, aname ):
182 """supported attributes by name are:
183 - lineno - returns the line number of the exception text
184 - col - returns the column number of the exception text
185 - line - returns the line containing the exception text
186 """
187 if( aname == "lineno" ):
188 return lineno( self.loc, self.pstr )
189 elif( aname in ("col", "column") ):
190 return col( self.loc, self.pstr )
191 elif( aname == "line" ):
192 return line( self.loc, self.pstr )
193 else:
194 raise AttributeError(aname)
195
196 def __str__( self ):
197 return "%s (at char %d), (line:%d, col:%d)" % \
198 ( self.msg, self.loc, self.lineno, self.column )
199 def __repr__( self ):
200 return _ustr(self)
201 def markInputline( self, markerString = ">!<" ):
202 """Extracts the exception line from the input string, and marks
203 the location of the exception with a special symbol.
204 """
205 line_str = self.line
206 line_column = self.column - 1
207 if markerString:
208 line_str = "".join( [line_str[:line_column],
209 markerString, line_str[line_column:]])
210 return line_str.strip()
211 def __dir__(self):
212 return "loc msg pstr parserElement lineno col line " \
213 "markInputLine __str__ __repr__".split()
214
215 class ParseException(ParseBaseException):
216 """exception thrown when parse expressions don't match class;
217 supported attributes by name are:
218 - lineno - returns the line number of the exception text
219 - col - returns the column number of the exception text
220 - line - returns the line containing the exception text
221 """
222 pass
223
224 class ParseFatalException(ParseBaseException):
225 """user-throwable exception thrown when inconsistent parse content
226 is found; stops all parsing immediately"""
227 pass
228
229 class ParseSyntaxException(ParseFatalException):
230 """just like ParseFatalException, but thrown internally when an
231 ErrorStop indicates that parsing is to stop immediately because
232 an unbacktrackable syntax error has been found"""
233 def __init__(self, pe):
234 super(ParseSyntaxException, self).__init__(
235 pe.pstr, pe.loc, pe.msg, pe.parserElement)
236
237 #~ class ReparseException(ParseBaseException):
238 #~ """Experimental class - parse actions can raise this exception to cause
239 #~ pyparsing to reparse the input string:
240 #~ - with a modified input string, and/or
241 #~ - with a modified start location
242 #~ Set the values of the ReparseException in the constructor, and raise the
243 #~ exception in a parse action to cause pyparsing to use the new string/location.
244 #~ Setting the values as None causes no change to be made.
245 #~ """
246 #~ def __init_( self, newstring, restartLoc ):
247 #~ self.newParseText = newstring
248 #~ self.reparseLoc = restartLoc
249
250 class RecursiveGrammarException(Exception):
251 """exception thrown by validate() if the grammar could be improperly recursive"""
252 def __init__( self, parseElementList ):
253 self.parseElementTrace = parseElementList
254
255 def __str__( self ):
256 return "RecursiveGrammarException: %s" % self.parseElementTrace
257
258 class _ParseResultsWithOffset(object):
259 def __init__(self,p1,p2):
260 self.tup = (p1,p2)
261 def __getitem__(self,i):
262 return self.tup[i]
263 def __repr__(self):
264 return repr(self.tup)
265 def setOffset(self,i):
266 self.tup = (self.tup[0],i)
267
268 class ParseResults(object):
269 """Structured parse results, to provide multiple means of access to the parsed data:
270 - as a list (len(results))
271 - by list index (results[0], results[1], etc.)
272 - by attribute (results.<resultsName>)
273 """
274 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
275 def __new__(cls, toklist, name=None, asList=True, modal=True ):
276 if isinstance(toklist, cls):
277 return toklist
278 retobj = object.__new__(cls)
279 retobj.__doinit = True
280 return retobj
281
282 # Performance tuning: we construct a *lot* of these, so keep this
283 # constructor as small and fast as possible
284 def __init__( self, toklist, name=None, asList=True, modal=True ):
285 if self.__doinit:
286 self.__doinit = False
287 self.__name = None
288 self.__parent = None
289 self.__accumNames = {}
290 if isinstance(toklist, list):
291 self.__toklist = toklist[:]
292 else:
293 self.__toklist = [toklist]
294 self.__tokdict = dict()
295
296 if name:
297 if not modal:
298 self.__accumNames[name] = 0
299 if isinstance(name,int):
300 name = _ustr(name) # will always return a str, but use _ustr for consistency
301 self.__name = name
302 if not toklist in (None,'',[]):
303 if isinstance(toklist,basestring):
304 toklist = [ toklist ]
305 if asList:
306 if isinstance(toklist,ParseResults):
307 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
308 else:
309 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
310 self[name].__name = name
311 else:
312 try:
313 self[name] = toklist[0]
314 except (KeyError,TypeError,IndexError):
315 self[name] = toklist
316
317 def __getitem__( self, i ):
318 if isinstance( i, (int,slice) ):
319 return self.__toklist[i]
320 else:
321 if i not in self.__accumNames:
322 return self.__tokdict[i][-1][0]
323 else:
324 return ParseResults([ v[0] for v in self.__tokdict[i] ])
325
326 def __setitem__( self, k, v ):
327 if isinstance(v,_ParseResultsWithOffset):
328 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
329 sub = v[0]
330 elif isinstance(k,int):
331 self.__toklist[k] = v
332 sub = v
333 else:
334 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
335 sub = v
336 if isinstance(sub,ParseResults):
337 sub.__parent = wkref(self)
338
339 def __delitem__( self, i ):
340 if isinstance(i,(int,slice)):
341 mylen = len( self.__toklist )
342 del self.__toklist[i]
343
344 # convert int to slice
345 if isinstance(i, int):
346 if i < 0:
347 i += mylen
348 i = slice(i, i+1)
349 # get removed indices
350 removed = list(range(*i.indices(mylen)))
351 removed.reverse()
352 # fixup indices in token dictionary
353 for name in self.__tokdict:
354 occurrences = self.__tokdict[name]
355 for j in removed:
356 for k, (value, position) in enumerate(occurrences):
357 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
358 else:
359 del self.__tokdict[i]
360
361 def __contains__( self, k ):
362 return k in self.__tokdict
363
364 def __len__( self ): return len( self.__toklist )
365 def __bool__(self): return len( self.__toklist ) > 0
366 __nonzero__ = __bool__
367 def __iter__( self ): return iter( self.__toklist )
368 def __reversed__( self ): return iter( reversed(self.__toklist) )
369 def keys( self ):
370 """Returns all named result keys."""
371 return self.__tokdict.keys()
372
373 def pop( self, index=-1 ):
374 """Removes and returns item at specified index (default=last).
375 Will work with either numeric indices or dict-key indicies."""
376 ret = self[index]
377 del self[index]
378 return ret
379
380 def get(self, key, defaultValue=None):
381 """Returns named result matching the given key, or if there is no
382 such name, then returns the given defaultValue or None if no
383 defaultValue is specified."""
384 if key in self:
385 return self[key]
386 else:
387 return defaultValue
388
389 def insert( self, index, insStr ):
390 self.__toklist.insert(index, insStr)
391 # fixup indices in token dictionary
392 for name in self.__tokdict:
393 occurrences = self.__tokdict[name]
394 for k, (value, position) in enumerate(occurrences):
395 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
396
397 def items( self ):
398 """Returns all named result keys and values as a list of tuples."""
399 return [(k,self[k]) for k in self.__tokdict]
400
401 def values( self ):
402 """Returns all named result values."""
403 return [ v[-1][0] for v in self.__tokdict.values() ]
404
405 def __getattr__( self, name ):
406 if name not in self.__slots__:
407 if name in self.__tokdict:
408 if name not in self.__accumNames:
409 return self.__tokdict[name][-1][0]
410 else:
411 return ParseResults([ v[0] for v in self.__tokdict[name] ])
412 else:
413 return ""
414 return None
415
416 def __add__( self, other ):
417 ret = self.copy()
418 ret += other
419 return ret
420
421 def __iadd__( self, other ):
422 if other.__tokdict:
423 offset = len(self.__toklist)
424 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
425 otheritems = other.__tokdict.items()
426 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
427 for (k,vlist) in otheritems for v in vlist]
428 for k,v in otherdictitems:
429 self[k] = v
430 if isinstance(v[0],ParseResults):
431 v[0].__parent = wkref(self)
432
433 self.__toklist += other.__toklist
434 self.__accumNames.update( other.__accumNames )
435 del other
436 return self
437
438 def __repr__( self ):
439 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
440
441 def __str__( self ):
442 out = "["
443 sep = ""
444 for i in self.__toklist:
445 if isinstance(i, ParseResults):
446 out += sep + _ustr(i)
447 else:
448 out += sep + repr(i)
449 sep = ", "
450 out += "]"
451 return out
452
453 def _asStringList( self, sep='' ):
454 out = []
455 for item in self.__toklist:
456 if out and sep:
457 out.append(sep)
458 if isinstance( item, ParseResults ):
459 out += item._asStringList()
460 else:
461 out.append( _ustr(item) )
462 return out
463
464 def asList( self ):
465 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
466 out = []
467 for res in self.__toklist:
468 if isinstance(res,ParseResults):
469 out.append( res.asList() )
470 else:
471 out.append( res )
472 return out
473
474 def asDict( self ):
475 """Returns the named parse results as dictionary."""
476 return dict( self.items() )
477
478 def copy( self ):
479 """Returns a new copy of a ParseResults object."""
480 ret = ParseResults( self.__toklist )
481 ret.__tokdict = self.__tokdict.copy()
482 ret.__parent = self.__parent
483 ret.__accumNames.update( self.__accumNames )
484 ret.__name = self.__name
485 return ret
486
487 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
488 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
489 nl = "\n"
490 out = []
491 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
492 for v in vlist ] )
493 nextLevelIndent = indent + " "
494
495 # collapse out indents if formatting is not desired
496 if not formatted:
497 indent = ""
498 nextLevelIndent = ""
499 nl = ""
500
501 selfTag = None
502 if doctag is not None:
503 selfTag = doctag
504 else:
505 if self.__name:
506 selfTag = self.__name
507
508 if not selfTag:
509 if namedItemsOnly:
510 return ""
511 else:
512 selfTag = "ITEM"
513
514 out += [ nl, indent, "<", selfTag, ">" ]
515
516 worklist = self.__toklist
517 for i,res in enumerate(worklist):
518 if isinstance(res,ParseResults):
519 if i in namedItems:
520 out += [ res.asXML(namedItems[i],
521 namedItemsOnly and doctag is None,
522 nextLevelIndent,
523 formatted)]
524 else:
525 out += [ res.asXML(None,
526 namedItemsOnly and doctag is None,
527 nextLevelIndent,
528 formatted)]
529 else:
530 # individual token, see if there is a name for it
531 resTag = None
532 if i in namedItems:
533 resTag = namedItems[i]
534 if not resTag:
535 if namedItemsOnly:
536 continue
537 else:
538 resTag = "ITEM"
539 xmlBodyText = _xml_escape(_ustr(res))
540 out += [ nl, nextLevelIndent, "<", resTag, ">",
541 xmlBodyText,
542 "</", resTag, ">" ]
543
544 out += [ nl, indent, "</", selfTag, ">" ]
545 return "".join(out)
546
547 def __lookup(self,sub):
548 for k,vlist in self.__tokdict.items():
549 for v,loc in vlist:
550 if sub is v:
551 return k
552 return None
553
554 def getName(self):
555 """Returns the results name for this token expression."""
556 if self.__name:
557 return self.__name
558 elif self.__parent:
559 par = self.__parent()
560 if par:
561 return par.__lookup(self)
562 else:
563 return None
564 elif (len(self) == 1 and
565 len(self.__tokdict) == 1 and
566 self.__tokdict.values()[0][0][1] in (0,-1)):
567 return self.__tokdict.keys()[0]
568 else:
569 return None
570
571 def dump(self,indent='',depth=0):
572 """Diagnostic method for listing out the contents of a ParseResults.
573 Accepts an optional indent argument so that this string can be embedded
574 in a nested display of other data."""
575 out = []
576 out.append( indent+_ustr(self.asList()) )
577 keys = self.items()
578 keys.sort()
579 for k,v in keys:
580 if out:
581 out.append('\n')
582 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
583 if isinstance(v,ParseResults):
584 if v.keys():
585 #~ out.append('\n')
586 out.append( v.dump(indent,depth+1) )
587 #~ out.append('\n')
588 else:
589 out.append(_ustr(v))
590 else:
591 out.append(_ustr(v))
592 #~ out.append('\n')
593 return "".join(out)
594
595 # add support for pickle protocol
596 def __getstate__(self):
597 return ( self.__toklist,
598 ( self.__tokdict.copy(),
599 self.__parent is not None and self.__parent() or None,
600 self.__accumNames,
601 self.__name ) )
602
603 def __setstate__(self,state):
604 self.__toklist = state[0]
605 self.__tokdict, \
606 par, \
607 inAccumNames, \
608 self.__name = state[1]
609 self.__accumNames = {}
610 self.__accumNames.update(inAccumNames)
611 if par is not None:
612 self.__parent = wkref(par)
613 else:
614 self.__parent = None
615
616 def __dir__(self):
617 return dir(super(ParseResults,self)) + self.keys()
618
619 def col (loc,strg):
620 """Returns current column within a string, counting newlines as line separators.
621 The first column is number 1.
622
623 Note: the default parsing behavior is to expand tabs in the input string
624 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
625 on parsing strings containing <TAB>s, and suggested methods to maintain a
626 consistent view of the parsed string, the parse location, and line and column
627 positions within the parsed string.
628 """
629 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
630
631 def lineno(loc,strg):
632 """Returns current line number within a string, counting newlines as line separators.
633 The first line is number 1.
634
635 Note: the default parsing behavior is to expand tabs in the input string
636 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
637 on parsing strings containing <TAB>s, and suggested methods to maintain a
638 consistent view of the parsed string, the parse location, and line and column
639 positions within the parsed string.
640 """
641 return strg.count("\n",0,loc) + 1
642
643 def line( loc, strg ):
644 """Returns the line of text containing loc within a string, counting newlines as line separators.
645 """
646 lastCR = strg.rfind("\n", 0, loc)
647 nextCR = strg.find("\n", loc)
648 if nextCR > 0:
649 return strg[lastCR+1:nextCR]
650 else:
651 return strg[lastCR+1:]
652
653 def _defaultStartDebugAction( instring, loc, expr ):
654 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
655
656 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
657 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
658
659 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
660 print ("Exception raised:" + _ustr(exc))
661
662 def nullDebugAction(*args):
663 """'Do-nothing' debug action, to suppress debugging output during parsing."""
664 pass
665
666 class ParserElement(object):
667 """Abstract base level parser element class."""
668 DEFAULT_WHITE_CHARS = " \n\t\r"
669
670 def setDefaultWhitespaceChars( chars ):
671 """Overrides the default whitespace chars
672 """
673 ParserElement.DEFAULT_WHITE_CHARS = chars
674 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
675
676 def __init__( self, savelist=False ):
677 self.parseAction = list()
678 self.failAction = None
679 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
680 self.strRepr = None
681 self.resultsName = None
682 self.saveAsList = savelist
683 self.skipWhitespace = True
684 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
685 self.copyDefaultWhiteChars = True
686 self.mayReturnEmpty = False # used when checking for left-recursion
687 self.keepTabs = False
688 self.ignoreExprs = list()
689 self.debug = False
690 self.streamlined = False
691 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
692 self.errmsg = ""
693 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
694 self.debugActions = ( None, None, None ) #custom debug actions
695 self.re = None
696 self.callPreparse = True # used to avoid redundant calls to preParse
697 self.callDuringTry = False
698
699 def copy( self ):
700 """Make a copy of this ParserElement. Useful for defining different parse actions
701 for the same parsing pattern, using copies of the original parse element."""
702 cpy = copy.copy( self )
703 cpy.parseAction = self.parseAction[:]
704 cpy.ignoreExprs = self.ignoreExprs[:]
705 if self.copyDefaultWhiteChars:
706 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
707 return cpy
708
709 def setName( self, name ):
710 """Define name for this expression, for use in debugging."""
711 self.name = name
712 self.errmsg = "Expected " + self.name
713 if hasattr(self,"exception"):
714 self.exception.msg = self.errmsg
715 return self
716
717 def setResultsName( self, name, listAllMatches=False ):
718 """Define name for referencing matching tokens as a nested attribute
719 of the returned parse results.
720 NOTE: this returns a *copy* of the original ParserElement object;
721 this is so that the client can define a basic element, such as an
722 integer, and reference it in multiple places with different names.
723 """
724 newself = self.copy()
725 newself.resultsName = name
726 newself.modalResults = not listAllMatches
727 return newself
728
729 def setBreak(self,breakFlag = True):
730 """Method to invoke the Python pdb debugger when this element is
731 about to be parsed. Set breakFlag to True to enable, False to
732 disable.
733 """
734 if breakFlag:
735 _parseMethod = self._parse
736 def breaker(instring, loc, doActions=True, callPreParse=True):
737 import pdb
738 pdb.set_trace()
739 return _parseMethod( instring, loc, doActions, callPreParse )
740 breaker._originalParseMethod = _parseMethod
741 self._parse = breaker
742 else:
743 if hasattr(self._parse,"_originalParseMethod"):
744 self._parse = self._parse._originalParseMethod
745 return self
746
747 def _normalizeParseActionArgs( f ):
748 """Internal method used to decorate parse actions that take fewer than 3 arguments,
749 so that all parse actions can be called as f(s,l,t)."""
750 STAR_ARGS = 4
751
752 try:
753 restore = None
754 if isinstance(f,type):
755 restore = f
756 f = f.__init__
757 if not _PY3K:
758 codeObj = f.func_code
759 else:
760 codeObj = f.code
761 if codeObj.co_flags & STAR_ARGS:
762 return f
763 numargs = codeObj.co_argcount
764 if not _PY3K:
765 if hasattr(f,"im_self"):
766 numargs -= 1
767 else:
768 if hasattr(f,"__self__"):
769 numargs -= 1
770 if restore:
771 f = restore
772 except AttributeError:
773 try:
774 if not _PY3K:
775 call_im_func_code = f.__call__.im_func.func_code
776 else:
777 call_im_func_code = f.__code__
778
779 # not a function, must be a callable object, get info from the
780 # im_func binding of its bound __call__ method
781 if call_im_func_code.co_flags & STAR_ARGS:
782 return f
783 numargs = call_im_func_code.co_argcount
784 if not _PY3K:
785 if hasattr(f.__call__,"im_self"):
786 numargs -= 1
787 else:
788 if hasattr(f.__call__,"__self__"):
789 numargs -= 0
790 except AttributeError:
791 if not _PY3K:
792 call_func_code = f.__call__.func_code
793 else:
794 call_func_code = f.__call__.__code__
795 # not a bound method, get info directly from __call__ method
796 if call_func_code.co_flags & STAR_ARGS:
797 return f
798 numargs = call_func_code.co_argcount
799 if not _PY3K:
800 if hasattr(f.__call__,"im_self"):
801 numargs -= 1
802 else:
803 if hasattr(f.__call__,"__self__"):
804 numargs -= 1
805
806
807 #~ print ("adding function %s with %d args" % (f.func_name,numargs))
808 if numargs == 3:
809 return f
810 else:
811 if numargs > 3:
812 def tmp(s,l,t):
813 return f(f.__call__.__self__, s,l,t)
814 if numargs == 2:
815 def tmp(s,l,t):
816 return f(l,t)
817 elif numargs == 1:
818 def tmp(s,l,t):
819 return f(t)
820 else: #~ numargs == 0:
821 def tmp(s,l,t):
822 return f()
823 try:
824 tmp.__name__ = f.__name__
825 except (AttributeError,TypeError):
826 # no need for special handling if attribute doesnt exist
827 pass
828 try:
829 tmp.__doc__ = f.__doc__
830 except (AttributeError,TypeError):
831 # no need for special handling if attribute doesnt exist
832 pass
833 try:
834 tmp.__dict__.update(f.__dict__)
835 except (AttributeError,TypeError):
836 # no need for special handling if attribute doesnt exist
837 pass
838 return tmp
839 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
840
841 def setParseAction( self, *fns, **kwargs ):
842 """Define action to perform when successfully matching parse element definition.
843 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
844 fn(loc,toks), fn(toks), or just fn(), where:
845 - s = the original string being parsed (see note below)
846 - loc = the location of the matching substring
847 - toks = a list of the matched tokens, packaged as a ParseResults object
848 If the functions in fns modify the tokens, they can return them as the return
849 value from fn, and the modified list of tokens will replace the original.
850 Otherwise, fn does not need to return any value.
851
852 Note: the default parsing behavior is to expand tabs in the input string
853 before starting the parsing process. See L{I{parseString}<parseString>} for more information
854 on parsing strings containing <TAB>s, and suggested methods to maintain a
855 consistent view of the parsed string, the parse location, and line and column
856 positions within the parsed string.
857 """
858 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
859 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
860 return self
861
862 def addParseAction( self, *fns, **kwargs ):
863 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
864 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
865 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
866 return self
867
868 def setFailAction( self, fn ):
869 """Define action to perform if parsing fails at this expression.
870 Fail acton fn is a callable function that takes the arguments
871 fn(s,loc,expr,err) where:
872 - s = string being parsed
873 - loc = location where expression match was attempted and failed
874 - expr = the parse expression that failed
875 - err = the exception thrown
876 The function returns no value. It may throw ParseFatalException
877 if it is desired to stop parsing immediately."""
878 self.failAction = fn
879 return self
880
881 def _skipIgnorables( self, instring, loc ):
882 exprsFound = True
883 while exprsFound:
884 exprsFound = False
885 for e in self.ignoreExprs:
886 try:
887 while 1:
888 loc,dummy = e._parse( instring, loc )
889 exprsFound = True
890 except ParseException:
891 pass
892 return loc
893
894 def preParse( self, instring, loc ):
895 if self.ignoreExprs:
896 loc = self._skipIgnorables( instring, loc )
897
898 if self.skipWhitespace:
899 wt = self.whiteChars
900 instrlen = len(instring)
901 while loc < instrlen and instring[loc] in wt:
902 loc += 1
903
904 return loc
905
906 def parseImpl( self, instring, loc, doActions=True ):
907 return loc, []
908
909 def postParse( self, instring, loc, tokenlist ):
910 return tokenlist
911
912 #~ @profile
913 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
914 debugging = ( self.debug ) #and doActions )
915
916 if debugging or self.failAction:
917 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
918 if (self.debugActions[0] ):
919 self.debugActions[0]( instring, loc, self )
920 if callPreParse and self.callPreparse:
921 preloc = self.preParse( instring, loc )
922 else:
923 preloc = loc
924 tokensStart = loc
925 try:
926 try:
927 loc,tokens = self.parseImpl( instring, preloc, doActions )
928 except IndexError:
929 raise ParseException( instring, len(instring), self.errmsg, self )
930 except ParseBaseException, err:
931 #~ print ("Exception raised:", err)
932 if self.debugActions[2]:
933 self.debugActions[2]( instring, tokensStart, self, err )
934 if self.failAction:
935 self.failAction( instring, tokensStart, self, err )
936 raise
937 else:
938 if callPreParse and self.callPreparse:
939 preloc = self.preParse( instring, loc )
940 else:
941 preloc = loc
942 tokensStart = loc
943 if self.mayIndexError or loc >= len(instring):
944 try:
945 loc,tokens = self.parseImpl( instring, preloc, doActions )
946 except IndexError:
947 raise ParseException( instring, len(instring), self.errmsg, self )
948 else:
949 loc,tokens = self.parseImpl( instring, preloc, doActions )
950
951 tokens = self.postParse( instring, loc, tokens )
952
953 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
954 if self.parseAction and (doActions or self.callDuringTry):
955 if debugging:
956 try:
957 for fn in self.parseAction:
958 tokens = fn( instring, tokensStart, retTokens )
959 if tokens is not None:
960 retTokens = ParseResults( tokens,
961 self.resultsName,
962 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
963 modal=self.modalResults )
964 except ParseBaseException, err:
965 #~ print "Exception raised in user parse action:", err
966 if (self.debugActions[2] ):
967 self.debugActions[2]( instring, tokensStart, self, err )
968 raise
969 else:
970 for fn in self.parseAction:
971 tokens = fn( instring, tokensStart, retTokens )
972 if tokens is not None:
973 retTokens = ParseResults( tokens,
974 self.resultsName,
975 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
976 modal=self.modalResults )
977
978 if debugging:
979 #~ print ("Matched",self,"->",retTokens.asList())
980 if (self.debugActions[1] ):
981 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
982
983 return loc, retTokens
984
985 def tryParse( self, instring, loc ):
986 try:
987 return self._parse( instring, loc, doActions=False )[0]
988 except ParseFatalException:
989 raise ParseException( instring, loc, self.errmsg, self)
990
991 # this method gets repeatedly called during backtracking with the same arguments -
992 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
993 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
994 lookup = (self,instring,loc,callPreParse,doActions)
995 if lookup in ParserElement._exprArgCache:
996 value = ParserElement._exprArgCache[ lookup ]
997 if isinstance(value,Exception):
998 raise value
999 return value
1000 else:
1001 try:
1002 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1003 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1004 return value
1005 except ParseBaseException, pe:
1006 ParserElement._exprArgCache[ lookup ] = pe
1007 raise
1008
1009 _parse = _parseNoCache
1010
1011 # argument cache for optimizing repeated calls when backtracking through recursive expressions
1012 _exprArgCache = {}
1013 def resetCache():
1014 ParserElement._exprArgCache.clear()
1015 resetCache = staticmethod(resetCache)
1016
1017 _packratEnabled = False
1018 def enablePackrat():
1019 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1020 Repeated parse attempts at the same string location (which happens
1021 often in many complex grammars) can immediately return a cached value,
1022 instead of re-executing parsing/validating code. Memoizing is done of
1023 both valid results and parsing exceptions.
1024
1025 This speedup may break existing programs that use parse actions that
1026 have side-effects. For this reason, packrat parsing is disabled when
1027 you first import pyparsing. To activate the packrat feature, your
1028 program must call the class method ParserElement.enablePackrat(). If
1029 your program uses psyco to "compile as you go", you must call
1030 enablePackrat before calling psyco.full(). If you do not do this,
1031 Python will crash. For best results, call enablePackrat() immediately
1032 after importing pyparsing.
1033 """
1034 if not ParserElement._packratEnabled:
1035 ParserElement._packratEnabled = True
1036 ParserElement._parse = ParserElement._parseCache
1037 enablePackrat = staticmethod(enablePackrat)
1038
1039 def parseString( self, instring, parseAll=False ):
1040 """Execute the parse expression with the given string.
1041 This is the main interface to the client code, once the complete
1042 expression has been built.
1043
1044 If you want the grammar to require that the entire input string be
1045 successfully parsed, then set parseAll to True (equivalent to ending
1046 the grammar with StringEnd()).
1047
1048 Note: parseString implicitly calls expandtabs() on the input string,
1049 in order to report proper column numbers in parse actions.
1050 If the input string contains tabs and
1051 the grammar uses parse actions that use the loc argument to index into the
1052 string being parsed, you can ensure you have a consistent view of the input
1053 string by:
1054 - calling parseWithTabs on your grammar before calling parseString
1055 (see L{I{parseWithTabs}<parseWithTabs>})
1056 - define your parse action using the full (s,loc,toks) signature, and
1057 reference the input string using the parse action's s argument
1058 - explictly expand the tabs in your input string before calling
1059 parseString
1060 """
1061 ParserElement.resetCache()
1062 if not self.streamlined:
1063 self.streamline()
1064 #~ self.saveAsList = True
1065 for e in self.ignoreExprs:
1066 e.streamline()
1067 if not self.keepTabs:
1068 instring = instring.expandtabs()
1069 try:
1070 loc, tokens = self._parse( instring, 0 )
1071 if parseAll:
1072 loc = self.preParse( instring, loc )
1073 StringEnd()._parse( instring, loc )
1074 except ParseBaseException, exc:
1075 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1076 raise exc
1077 else:
1078 return tokens
1079
1080 def scanString( self, instring, maxMatches=_MAX_INT ):
1081 """Scan the input string for expression matches. Each match will return the
1082 matching tokens, start location, and end location. May be called with optional
1083 maxMatches argument, to clip scanning after 'n' matches are found.
1084
1085 Note that the start and end locations are reported relative to the string
1086 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1087 strings with embedded tabs."""
1088 if not self.streamlined:
1089 self.streamline()
1090 for e in self.ignoreExprs:
1091 e.streamline()
1092
1093 if not self.keepTabs:
1094 instring = _ustr(instring).expandtabs()
1095 instrlen = len(instring)
1096 loc = 0
1097 preparseFn = self.preParse
1098 parseFn = self._parse
1099 ParserElement.resetCache()
1100 matches = 0
1101 try:
1102 while loc <= instrlen and matches < maxMatches:
1103 try:
1104 preloc = preparseFn( instring, loc )
1105 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1106 except ParseException:
1107 loc = preloc+1
1108 else:
1109 matches += 1
1110 yield tokens, preloc, nextLoc
1111 loc = nextLoc
1112 except ParseBaseException, pe:
1113 raise pe
1114
1115 def transformString( self, instring ):
1116 """Extension to scanString, to modify matching text with modified tokens that may
1117 be returned from a parse action. To use transformString, define a grammar and
1118 attach a parse action to it that modifies the returned token list.
1119 Invoking transformString() on a target string will then scan for matches,
1120 and replace the matched text patterns according to the logic in the parse
1121 action. transformString() returns the resulting transformed string."""
1122 out = []
1123 lastE = 0
1124 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1125 # keep string locs straight between transformString and scanString
1126 self.keepTabs = True
1127 try:
1128 for t,s,e in self.scanString( instring ):
1129 out.append( instring[lastE:s] )
1130 if t:
1131 if isinstance(t,ParseResults):
1132 out += t.asList()
1133 elif isinstance(t,list):
1134 out += t
1135 else:
1136 out.append(t)
1137 lastE = e
1138 out.append(instring[lastE:])
1139 return "".join(map(_ustr,out))
1140 except ParseBaseException, pe:
1141 raise pe
1142
1143 def searchString( self, instring, maxMatches=_MAX_INT ):
1144 """Another extension to scanString, simplifying the access to the tokens found
1145 to match the given parse expression. May be called with optional
1146 maxMatches argument, to clip searching after 'n' matches are found.
1147 """
1148 try:
1149 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1150 except ParseBaseException, pe:
1151 raise pe
1152
1153 def __add__(self, other ):
1154 """Implementation of + operator - returns And"""
1155 if isinstance( other, basestring ):
1156 other = Literal( other )
1157 if not isinstance( other, ParserElement ):
1158 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1159 SyntaxWarning, stacklevel=2)
1160 return None
1161 return And( [ self, other ] )
1162
1163 def __radd__(self, other ):
1164 """Implementation of + operator when left operand is not a ParserElement"""
1165 if isinstance( other, basestring ):
1166 other = Literal( other )
1167 if not isinstance( other, ParserElement ):
1168 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1169 SyntaxWarning, stacklevel=2)
1170 return None
1171 return other + self
1172
1173 def __sub__(self, other):
1174 """Implementation of - operator, returns And with error stop"""
1175 if isinstance( other, basestring ):
1176 other = Literal( other )
1177 if not isinstance( other, ParserElement ):
1178 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1179 SyntaxWarning, stacklevel=2)
1180 return None
1181 return And( [ self, And._ErrorStop(), other ] )
1182
1183 def __rsub__(self, other ):
1184 """Implementation of - operator when left operand is not a ParserElement"""
1185 if isinstance( other, basestring ):
1186 other = Literal( other )
1187 if not isinstance( other, ParserElement ):
1188 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1189 SyntaxWarning, stacklevel=2)
1190 return None
1191 return other - self
1192
1193 def __mul__(self,other):
1194 if isinstance(other,int):
1195 minElements, optElements = other,0
1196 elif isinstance(other,tuple):
1197 other = (other + (None, None))[:2]
1198 if other[0] is None:
1199 other = (0, other[1])
1200 if isinstance(other[0],int) and other[1] is None:
1201 if other[0] == 0:
1202 return ZeroOrMore(self)
1203 if other[0] == 1:
1204 return OneOrMore(self)
1205 else:
1206 return self*other[0] + ZeroOrMore(self)
1207 elif isinstance(other[0],int) and isinstance(other[1],int):
1208 minElements, optElements = other
1209 optElements -= minElements
1210 else:
1211 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1212 else:
1213 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1214
1215 if minElements < 0:
1216 raise ValueError("cannot multiply ParserElement by negative value")
1217 if optElements < 0:
1218 raise ValueError("second tuple value must be greater or equal to first tuple value")
1219 if minElements == optElements == 0:
1220 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1221
1222 if (optElements):
1223 def makeOptionalList(n):
1224 if n>1:
1225 return Optional(self + makeOptionalList(n-1))
1226 else:
1227 return Optional(self)
1228 if minElements:
1229 if minElements == 1:
1230 ret = self + makeOptionalList(optElements)
1231 else:
1232 ret = And([self]*minElements) + makeOptionalList(optElements)
1233 else:
1234 ret = makeOptionalList(optElements)
1235 else:
1236 if minElements == 1:
1237 ret = self
1238 else:
1239 ret = And([self]*minElements)
1240 return ret
1241
1242 def __rmul__(self, other):
1243 return self.__mul__(other)
1244
1245 def __or__(self, other ):
1246 """Implementation of | operator - returns MatchFirst"""
1247 if isinstance( other, basestring ):
1248 other = Literal( other )
1249 if not isinstance( other, ParserElement ):
1250 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1251 SyntaxWarning, stacklevel=2)
1252 return None
1253 return MatchFirst( [ self, other ] )
1254
1255 def __ror__(self, other ):
1256 """Implementation of | operator when left operand is not a ParserElement"""
1257 if isinstance( other, basestring ):
1258 other = Literal( other )
1259 if not isinstance( other, ParserElement ):
1260 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1261 SyntaxWarning, stacklevel=2)
1262 return None
1263 return other | self
1264
1265 def __xor__(self, other ):
1266 """Implementation of ^ operator - returns Or"""
1267 if isinstance( other, basestring ):
1268 other = Literal( other )
1269 if not isinstance( other, ParserElement ):
1270 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1271 SyntaxWarning, stacklevel=2)
1272 return None
1273 return Or( [ self, other ] )
1274
1275 def __rxor__(self, other ):
1276 """Implementation of ^ operator when left operand is not a ParserElement"""
1277 if isinstance( other, basestring ):
1278 other = Literal( other )
1279 if not isinstance( other, ParserElement ):
1280 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1281 SyntaxWarning, stacklevel=2)
1282 return None
1283 return other ^ self
1284
1285 def __and__(self, other ):
1286 """Implementation of & operator - returns Each"""
1287 if isinstance( other, basestring ):
1288 other = Literal( other )
1289 if not isinstance( other, ParserElement ):
1290 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1291 SyntaxWarning, stacklevel=2)
1292 return None
1293 return Each( [ self, other ] )
1294
1295 def __rand__(self, other ):
1296 """Implementation of & operator when left operand is not a ParserElement"""
1297 if isinstance( other, basestring ):
1298 other = Literal( other )
1299 if not isinstance( other, ParserElement ):
1300 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1301 SyntaxWarning, stacklevel=2)
1302 return None
1303 return other & self
1304
1305 def __invert__( self ):
1306 """Implementation of ~ operator - returns NotAny"""
1307 return NotAny( self )
1308
1309 def __call__(self, name):
1310 """Shortcut for setResultsName, with listAllMatches=default::
1311 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1312 could be written as::
1313 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1314 """
1315 return self.setResultsName(name)
1316
1317 def suppress( self ):
1318 """Suppresses the output of this ParserElement; useful to keep punctuation from
1319 cluttering up returned output.
1320 """
1321 return Suppress( self )
1322
1323 def leaveWhitespace( self ):
1324 """Disables the skipping of whitespace before matching the characters in the
1325 ParserElement's defined pattern. This is normally only used internally by
1326 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1327 """
1328 self.skipWhitespace = False
1329 return self
1330
1331 def setWhitespaceChars( self, chars ):
1332 """Overrides the default whitespace chars
1333 """
1334 self.skipWhitespace = True
1335 self.whiteChars = chars
1336 self.copyDefaultWhiteChars = False
1337 return self
1338
1339 def parseWithTabs( self ):
1340 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1341 Must be called before parseString when the input grammar contains elements that
1342 match <TAB> characters."""
1343 self.keepTabs = True
1344 return self
1345
1346 def ignore( self, other ):
1347 """Define expression to be ignored (e.g., comments) while doing pattern
1348 matching; may be called repeatedly, to define multiple comment or other
1349 ignorable patterns.
1350 """
1351 if isinstance( other, Suppress ):
1352 if other not in self.ignoreExprs:
1353 self.ignoreExprs.append( other )
1354 else:
1355 self.ignoreExprs.append( Suppress( other ) )
1356 return self
1357
1358 def setDebugActions( self, startAction, successAction, exceptionAction ):
1359 """Enable display of debugging messages while doing pattern matching."""
1360 self.debugActions = (startAction or _defaultStartDebugAction,
1361 successAction or _defaultSuccessDebugAction,
1362 exceptionAction or _defaultExceptionDebugAction)
1363 self.debug = True
1364 return self
1365
1366 def setDebug( self, flag=True ):
1367 """Enable display of debugging messages while doing pattern matching.
1368 Set flag to True to enable, False to disable."""
1369 if flag:
1370 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1371 else:
1372 self.debug = False
1373 return self
1374
1375 def __str__( self ):
1376 return self.name
1377
1378 def __repr__( self ):
1379 return _ustr(self)
1380
1381 def streamline( self ):
1382 self.streamlined = True
1383 self.strRepr = None
1384 return self
1385
1386 def checkRecursion( self, parseElementList ):
1387 pass
1388
1389 def validate( self, validateTrace=[] ):
1390 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1391 self.checkRecursion( [] )
1392
1393 def parseFile( self, file_or_filename, parseAll=False ):
1394 """Execute the parse expression on the given file or filename.
1395 If a filename is specified (instead of a file object),
1396 the entire file is opened, read, and closed before parsing.
1397 """
1398 try:
1399 file_contents = file_or_filename.read()
1400 except AttributeError:
1401 f = open(file_or_filename, "rb")
1402 file_contents = f.read()
1403 f.close()
1404 try:
1405 return self.parseString(file_contents, parseAll)
1406 except ParseBaseException, exc:
1407 # catch and re-raise exception from here, clears out pyparsing internal stack trace
1408 raise exc
1409
1410 def getException(self):
1411 return ParseException("",0,self.errmsg,self)
1412
1413 def __getattr__(self,aname):
1414 if aname == "myException":
1415 self.myException = ret = self.getException();
1416 return ret;
1417 else:
1418 raise AttributeError("no such attribute " + aname)
1419
1420 def __eq__(self,other):
1421 if isinstance(other, ParserElement):
1422 return self is other or self.__dict__ == other.__dict__
1423 elif isinstance(other, basestring):
1424 try:
1425 self.parseString(_ustr(other), parseAll=True)
1426 return True
1427 except ParseBaseException:
1428 return False
1429 else:
1430 return super(ParserElement,self)==other
1431
1432 def __ne__(self,other):
1433 return not (self == other)
1434
1435 def __hash__(self):
1436 return hash(id(self))
1437
1438 def __req__(self,other):
1439 return self == other
1440
1441 def __rne__(self,other):
1442 return not (self == other)
1443
1444
1445 class Token(ParserElement):
1446 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1447 def __init__( self ):
1448 super(Token,self).__init__( savelist=False )
1449 #self.myException = ParseException("",0,"",self)
1450
1451 def setName(self, name):
1452 s = super(Token,self).setName(name)
1453 self.errmsg = "Expected " + self.name
1454 #s.myException.msg = self.errmsg
1455 return s
1456
1457
1458 class Empty(Token):
1459 """An empty token, will always match."""
1460 def __init__( self ):
1461 super(Empty,self).__init__()
1462 self.name = "Empty"
1463 self.mayReturnEmpty = True
1464 self.mayIndexError = False
1465
1466
1467 class NoMatch(Token):
1468 """A token that will never match."""
1469 def __init__( self ):
1470 super(NoMatch,self).__init__()
1471 self.name = "NoMatch"
1472 self.mayReturnEmpty = True
1473 self.mayIndexError = False
1474 self.errmsg = "Unmatchable token"
1475 #self.myException.msg = self.errmsg
1476
1477 def parseImpl( self, instring, loc, doActions=True ):
1478 exc = self.myException
1479 exc.loc = loc
1480 exc.pstr = instring
1481 raise exc
1482
1483
1484 class Literal(Token):
1485 """Token to exactly match a specified string."""
1486 def __init__( self, matchString ):
1487 super(Literal,self).__init__()
1488 self.match = matchString
1489 self.matchLen = len(matchString)
1490 try:
1491 self.firstMatchChar = matchString[0]
1492 except IndexError:
1493 warnings.warn("null string passed to Literal; use Empty() instead",
1494 SyntaxWarning, stacklevel=2)
1495 self.__class__ = Empty
1496 self.name = '"%s"' % _ustr(self.match)
1497 self.errmsg = "Expected " + self.name
1498 self.mayReturnEmpty = False
1499 #self.myException.msg = self.errmsg
1500 self.mayIndexError = False
1501
1502 # Performance tuning: this routine gets called a *lot*
1503 # if this is a single character match string and the first character matches,
1504 # short-circuit as quickly as possible, and avoid calling startswith
1505 #~ @profile
1506 def parseImpl( self, instring, loc, doActions=True ):
1507 if (instring[loc] == self.firstMatchChar and
1508 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1509 return loc+self.matchLen, self.match
1510 #~ raise ParseException( instring, loc, self.errmsg )
1511 exc = self.myException
1512 exc.loc = loc
1513 exc.pstr = instring
1514 raise exc
1515 _L = Literal
1516
1517 class Keyword(Token):
1518 """Token to exactly match a specified string as a keyword, that is, it must be
1519 immediately followed by a non-keyword character. Compare with Literal::
1520 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1521 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1522 Accepts two optional constructor arguments in addition to the keyword string:
1523 identChars is a string of characters that would be valid identifier characters,
1524 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1525 matching, default is False.
1526 """
1527 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1528
1529 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1530 super(Keyword,self).__init__()
1531 self.match = matchString
1532 self.matchLen = len(matchString)
1533 try:
1534 self.firstMatchChar = matchString[0]
1535 except IndexError:
1536 warnings.warn("null string passed to Keyword; use Empty() instead",
1537 SyntaxWarning, stacklevel=2)
1538 self.name = '"%s"' % self.match
1539 self.errmsg = "Expected " + self.name
1540 self.mayReturnEmpty = False
1541 #self.myException.msg = self.errmsg
1542 self.mayIndexError = False
1543 self.caseless = caseless
1544 if caseless:
1545 self.caselessmatch = matchString.upper()
1546 identChars = identChars.upper()
1547 self.identChars = _str2dict(identChars)
1548
1549 def parseImpl( self, instring, loc, doActions=True ):
1550 if self.caseless:
1551 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1552 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1553 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1554 return loc+self.matchLen, self.match
1555 else:
1556 if (instring[loc] == self.firstMatchChar and
1557 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1558 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1559 (loc == 0 or instring[loc-1] not in self.identChars) ):
1560 return loc+self.matchLen, self.match
1561 #~ raise ParseException( instring, loc, self.errmsg )
1562 exc = self.myException
1563 exc.loc = loc
1564 exc.pstr = instring
1565 raise exc
1566
1567 def copy(self):
1568 c = super(Keyword,self).copy()
1569 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1570 return c
1571
1572 def setDefaultKeywordChars( chars ):
1573 """Overrides the default Keyword chars
1574 """
1575 Keyword.DEFAULT_KEYWORD_CHARS = chars
1576 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1577
1578 class CaselessLiteral(Literal):
1579 """Token to match a specified string, ignoring case of letters.
1580 Note: the matched results will always be in the case of the given
1581 match string, NOT the case of the input text.
1582 """
1583 def __init__( self, matchString ):
1584 super(CaselessLiteral,self).__init__( matchString.upper() )
1585 # Preserve the defining literal.
1586 self.returnString = matchString
1587 self.name = "'%s'" % self.returnString
1588 self.errmsg = "Expected " + self.name
1589 #self.myException.msg = self.errmsg
1590
1591 def parseImpl( self, instring, loc, doActions=True ):
1592 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1593 return loc+self.matchLen, self.returnString
1594 #~ raise ParseException( instring, loc, self.errmsg )
1595 exc = self.myException
1596 exc.loc = loc
1597 exc.pstr = instring
1598 raise exc
1599
1600 class CaselessKeyword(Keyword):
1601 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1602 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1603
1604 def parseImpl( self, instring, loc, doActions=True ):
1605 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1606 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1607 return loc+self.matchLen, self.match
1608 #~ raise ParseException( instring, loc, self.errmsg )
1609 exc = self.myException
1610 exc.loc = loc
1611 exc.pstr = instring
1612 raise exc
1613
1614 class Word(Token):
1615 """Token for matching words composed of allowed character sets.
1616 Defined with string containing all allowed initial characters,
1617 an optional string containing allowed body characters (if omitted,
1618 defaults to the initial character set), and an optional minimum,
1619 maximum, and/or exact length. The default value for min is 1 (a
1620 minimum value < 1 is not valid); the default values for max and exact
1621 are 0, meaning no maximum or exact length restriction.
1622 """
1623 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1624 super(Word,self).__init__()
1625 self.initCharsOrig = initChars
1626 self.initChars = _str2dict(initChars)
1627 if bodyChars :
1628 self.bodyCharsOrig = bodyChars
1629 self.bodyChars = _str2dict(bodyChars)
1630 else:
1631 self.bodyCharsOrig = initChars
1632 self.bodyChars = _str2dict(initChars)
1633
1634 self.maxSpecified = max > 0
1635
1636 if min < 1:
1637 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1638
1639 self.minLen = min
1640
1641 if max > 0:
1642 self.maxLen = max
1643 else:
1644 self.maxLen = _MAX_INT
1645
1646 if exact > 0:
1647 self.maxLen = exact
1648 self.minLen = exact
1649
1650 self.name = _ustr(self)
1651 self.errmsg = "Expected " + self.name
1652 #self.myException.msg = self.errmsg
1653 self.mayIndexError = False
1654 self.asKeyword = asKeyword
1655
1656 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1657 if self.bodyCharsOrig == self.initCharsOrig:
1658 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1659 elif len(self.bodyCharsOrig) == 1:
1660 self.reString = "%s[%s]*" % \
1661 (re.escape(self.initCharsOrig),
1662 _escapeRegexRangeChars(self.bodyCharsOrig),)
1663 else:
1664 self.reString = "[%s][%s]*" % \
1665 (_escapeRegexRangeChars(self.initCharsOrig),
1666 _escapeRegexRangeChars(self.bodyCharsOrig),)
1667 if self.asKeyword:
1668 self.reString = r"\b"+self.reString+r"\b"
1669 try:
1670 self.re = re.compile( self.reString )
1671 except:
1672 self.re = None
1673
1674 def parseImpl( self, instring, loc, doActions=True ):
1675 if self.re:
1676 result = self.re.match(instring,loc)
1677 if not result:
1678 exc = self.myException
1679 exc.loc = loc
1680 exc.pstr = instring
1681 raise exc
1682
1683 loc = result.end()
1684 return loc,result.group()
1685
1686 if not(instring[ loc ] in self.initChars):
1687 #~ raise ParseException( instring, loc, self.errmsg )
1688 exc = self.myException
1689 exc.loc = loc
1690 exc.pstr = instring
1691 raise exc
1692 start = loc
1693 loc += 1
1694 instrlen = len(instring)
1695 bodychars = self.bodyChars
1696 maxloc = start + self.maxLen
1697 maxloc = min( maxloc, instrlen )
1698 while loc < maxloc and instring[loc] in bodychars:
1699 loc += 1
1700
1701 throwException = False
1702 if loc - start < self.minLen:
1703 throwException = True
1704 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1705 throwException = True
1706 if self.asKeyword:
1707 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1708 throwException = True
1709
1710 if throwException:
1711 #~ raise ParseException( instring, loc, self.errmsg )
1712 exc = self.myException
1713 exc.loc = loc
1714 exc.pstr = instring
1715 raise exc
1716
1717 return loc, instring[start:loc]
1718
1719 def __str__( self ):
1720 try:
1721 return super(Word,self).__str__()
1722 except:
1723 pass
1724
1725
1726 if self.strRepr is None:
1727
1728 def charsAsStr(s):
1729 if len(s)>4:
1730 return s[:4]+"..."
1731 else:
1732 return s
1733
1734 if ( self.initCharsOrig != self.bodyCharsOrig ):
1735 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1736 else:
1737 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1738
1739 return self.strRepr
1740
1741
1742 class Regex(Token):
1743 """Token for matching strings that match a given regular expression.
1744 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1745 """
1746 def __init__( self, pattern, flags=0):
1747 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1748 super(Regex,self).__init__()
1749
1750 if len(pattern) == 0:
1751 warnings.warn("null string passed to Regex; use Empty() instead",
1752 SyntaxWarning, stacklevel=2)
1753
1754 self.pattern = pattern
1755 self.flags = flags
1756
1757 try:
1758 self.re = re.compile(self.pattern, self.flags)
1759 self.reString = self.pattern
1760 except sre_constants.error:
1761 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1762 SyntaxWarning, stacklevel=2)
1763 raise
1764
1765 self.name = _ustr(self)
1766 self.errmsg = "Expected " + self.name
1767 #self.myException.msg = self.errmsg
1768 self.mayIndexError = False
1769 self.mayReturnEmpty = True
1770
1771 def parseImpl( self, instring, loc, doActions=True ):
1772 result = self.re.match(instring,loc)
1773 if not result:
1774 exc = self.myException
1775 exc.loc = loc
1776 exc.pstr = instring
1777 raise exc
1778
1779 loc = result.end()
1780 d = result.groupdict()
1781 ret = ParseResults(result.group())
1782 if d:
1783 for k in d:
1784 ret[k] = d[k]
1785 return loc,ret
1786
1787 def __str__( self ):
1788 try:
1789 return super(Regex,self).__str__()
1790 except:
1791 pass
1792
1793 if self.strRepr is None:
1794 self.strRepr = "Re:(%s)" % repr(self.pattern)
1795
1796 return self.strRepr
1797
1798
1799 class QuotedString(Token):
1800 """Token for matching strings that are delimited by quoting characters.
1801 """
1802 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1803 """
1804 Defined with the following parameters:
1805 - quoteChar - string of one or more characters defining the quote delimiting string
1806 - escChar - character to escape quotes, typically backslash (default=None)
1807 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1808 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1809 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1810 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1811 """
1812 super(QuotedString,self).__init__()
1813
1814 # remove white space from quote chars - wont work anyway
1815 quoteChar = quoteChar.strip()
1816 if len(quoteChar) == 0:
1817 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1818 raise SyntaxError()
1819
1820 if endQuoteChar is None:
1821 endQuoteChar = quoteChar
1822 else:
1823 endQuoteChar = endQuoteChar.strip()
1824 if len(endQuoteChar) == 0:
1825 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1826 raise SyntaxError()
1827
1828 self.quoteChar = quoteChar
1829 self.quoteCharLen = len(quoteChar)
1830 self.firstQuoteChar = quoteChar[0]
1831 self.endQuoteChar = endQuoteChar
1832 self.endQuoteCharLen = len(endQuoteChar)
1833 self.escChar = escChar
1834 self.escQuote = escQuote
1835 self.unquoteResults = unquoteResults
1836
1837 if multiline:
1838 self.flags = re.MULTILINE | re.DOTALL
1839 self.pattern = r'%s(?:[^%s%s]' % \
1840 ( re.escape(self.quoteChar),
1841 _escapeRegexRangeChars(self.endQuoteChar[0]),
1842 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1843 else:
1844 self.flags = 0
1845 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1846 ( re.escape(self.quoteChar),
1847 _escapeRegexRangeChars(self.endQuoteChar[0]),
1848 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1849 if len(self.endQuoteChar) > 1:
1850 self.pattern += (
1851 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1852 _escapeRegexRangeChars(self.endQuoteChar[i]))
1853 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1854 )
1855 if escQuote:
1856 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1857 if escChar:
1858 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1859 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1860 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1861
1862 try:
1863 self.re = re.compile(self.pattern, self.flags)
1864 self.reString = self.pattern
1865 except sre_constants.error:
1866 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1867 SyntaxWarning, stacklevel=2)
1868 raise
1869
1870 self.name = _ustr(self)
1871 self.errmsg = "Expected " + self.name
1872 #self.myException.msg = self.errmsg
1873 self.mayIndexError = False
1874 self.mayReturnEmpty = True
1875
1876 def parseImpl( self, instring, loc, doActions=True ):
1877 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1878 if not result:
1879 exc = self.myException
1880 exc.loc = loc
1881 exc.pstr = instring
1882 raise exc
1883
1884 loc = result.end()
1885 ret = result.group()
1886
1887 if self.unquoteResults:
1888
1889 # strip off quotes
1890 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1891
1892 if isinstance(ret,basestring):
1893 # replace escaped characters
1894 if self.escChar:
1895 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1896
1897 # replace escaped quotes
1898 if self.escQuote:
1899 ret = ret.replace(self.escQuote, self.endQuoteChar)
1900
1901 return loc, ret
1902
1903 def __str__( self ):
1904 try:
1905 return super(QuotedString,self).__str__()
1906 except:
1907 pass
1908
1909 if self.strRepr is None:
1910 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1911
1912 return self.strRepr
1913
1914
1915 class CharsNotIn(Token):
1916 """Token for matching words composed of characters *not* in a given set.
1917 Defined with string containing all disallowed characters, and an optional
1918 minimum, maximum, and/or exact length. The default value for min is 1 (a
1919 minimum value < 1 is not valid); the default values for max and exact
1920 are 0, meaning no maximum or exact length restriction.
1921 """
1922 def __init__( self, notChars, min=1, max=0, exact=0 ):
1923 super(CharsNotIn,self).__init__()
1924 self.skipWhitespace = False
1925 self.notChars = notChars
1926
1927 if min < 1:
1928 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1929
1930 self.minLen = min
1931
1932 if max > 0:
1933 self.maxLen = max
1934 else:
1935 self.maxLen = _MAX_INT
1936
1937 if exact > 0:
1938 self.maxLen = exact
1939 self.minLen = exact
1940
1941 self.name = _ustr(self)
1942 self.errmsg = "Expected " + self.name
1943 self.mayReturnEmpty = ( self.minLen == 0 )
1944 #self.myException.msg = self.errmsg
1945 self.mayIndexError = False
1946
1947 def parseImpl( self, instring, loc, doActions=True ):
1948 if instring[loc] in self.notChars:
1949 #~ raise ParseException( instring, loc, self.errmsg )
1950 exc = self.myException
1951 exc.loc = loc
1952 exc.pstr = instring
1953 raise exc
1954
1955 start = loc
1956 loc += 1
1957 notchars = self.notChars
1958 maxlen = min( start+self.maxLen, len(instring) )
1959 while loc < maxlen and \
1960 (instring[loc] not in notchars):
1961 loc += 1
1962
1963 if loc - start < self.minLen:
1964 #~ raise ParseException( instring, loc, self.errmsg )
1965 exc = self.myException
1966 exc.loc = loc
1967 exc.pstr = instring
1968 raise exc
1969
1970 return loc, instring[start:loc]
1971
1972 def __str__( self ):
1973 try:
1974 return super(CharsNotIn, self).__str__()
1975 except:
1976 pass
1977
1978 if self.strRepr is None:
1979 if len(self.notChars) > 4:
1980 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1981 else:
1982 self.strRepr = "!W:(%s)" % self.notChars
1983
1984 return self.strRepr
1985
1986 class White(Token):
1987 """Special matching class for matching whitespace. Normally, whitespace is ignored
1988 by pyparsing grammars. This class is included when some whitespace structures
1989 are significant. Define with a string containing the whitespace characters to be
1990 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
1991 as defined for the Word class."""
1992 whiteStrs = {
1993 " " : "<SPC>",
1994 "\t": "<TAB>",
1995 "\n": "<LF>",
1996 "\r": "<CR>",
1997 "\f": "<FF>",
1998 }
1999 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2000 super(White,self).__init__()
2001 self.matchWhite = ws
2002 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2003 #~ self.leaveWhitespace()
2004 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2005 self.mayReturnEmpty = True
2006 self.errmsg = "Expected " + self.name
2007 #self.myException.msg = self.errmsg
2008
2009 self.minLen = min
2010
2011 if max > 0:
2012 self.maxLen = max
2013 else:
2014 self.maxLen = _MAX_INT
2015
2016 if exact > 0:
2017 self.maxLen = exact
2018 self.minLen = exact
2019
2020 def parseImpl( self, instring, loc, doActions=True ):
2021 if not(instring[ loc ] in self.matchWhite):
2022 #~ raise ParseException( instring, loc, self.errmsg )
2023 exc = self.myException
2024 exc.loc = loc
2025 exc.pstr = instring
2026 raise exc
2027 start = loc
2028 loc += 1
2029 maxloc = start + self.maxLen
2030 maxloc = min( maxloc, len(instring) )
2031 while loc < maxloc and instring[loc] in self.matchWhite:
2032 loc += 1
2033
2034 if loc - start < self.minLen:
2035 #~ raise ParseException( instring, loc, self.errmsg )
2036 exc = self.myException
2037 exc.loc = loc
2038 exc.pstr = instring
2039 raise exc
2040
2041 return loc, instring[start:loc]
2042
2043
2044 class _PositionToken(Token):
2045 def __init__( self ):
2046 super(_PositionToken,self).__init__()
2047 self.name=self.__class__.__name__
2048 self.mayReturnEmpty = True
2049 self.mayIndexError = False
2050
2051 class GoToColumn(_PositionToken):
2052 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2053 def __init__( self, colno ):
2054 super(GoToColumn,self).__init__()
2055 self.col = colno
2056
2057 def preParse( self, instring, loc ):
2058 if col(loc,instring) != self.col:
2059 instrlen = len(instring)
2060 if self.ignoreExprs:
2061 loc = self._skipIgnorables( instring, loc )
2062 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2063 loc += 1
2064 return loc
2065
2066 def parseImpl( self, instring, loc, doActions=True ):
2067 thiscol = col( loc, instring )
2068 if thiscol > self.col:
2069 raise ParseException( instring, loc, "Text not in expected column", self )
2070 newloc = loc + self.col - thiscol
2071 ret = instring[ loc: newloc ]
2072 return newloc, ret
2073
2074 class LineStart(_PositionToken):
2075 """Matches if current position is at the beginning of a line within the parse string"""
2076 def __init__( self ):
2077 super(LineStart,self).__init__()
2078 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2079 self.errmsg = "Expected start of line"
2080 #self.myException.msg = self.errmsg
2081
2082 def preParse( self, instring, loc ):
2083 preloc = super(LineStart,self).preParse(instring,loc)
2084 if instring[preloc] == "\n":
2085 loc += 1
2086 return loc
2087
2088 def parseImpl( self, instring, loc, doActions=True ):
2089 if not( loc==0 or
2090 (loc == self.preParse( instring, 0 )) or
2091 (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2092 #~ raise ParseException( instring, loc, "Expected start of line" )
2093 exc = self.myException
2094 exc.loc = loc
2095 exc.pstr = instring
2096 raise exc
2097 return loc, []
2098
2099 class LineEnd(_PositionToken):
2100 """Matches if current position is at the end of a line within the parse string"""
2101 def __init__( self ):
2102 super(LineEnd,self).__init__()
2103 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2104 self.errmsg = "Expected end of line"
2105 #self.myException.msg = self.errmsg
2106
2107 def parseImpl( self, instring, loc, doActions=True ):
2108 if loc<len(instring):
2109 if instring[loc] == "\n":
2110 return loc+1, "\n"
2111 else:
2112 #~ raise ParseException( instring, loc, "Expected end of line" )
2113 exc = self.myException
2114 exc.loc = loc
2115 exc.pstr = instring
2116 raise exc
2117 elif loc == len(instring):
2118 return loc+1, []
2119 else:
2120 exc = self.myException
2121 exc.loc = loc
2122 exc.pstr = instring
2123 raise exc
2124
2125 class StringStart(_PositionToken):
2126 """Matches if current position is at the beginning of the parse string"""
2127 def __init__( self ):
2128 super(StringStart,self).__init__()
2129 self.errmsg = "Expected start of text"
2130 #self.myException.msg = self.errmsg
2131
2132 def parseImpl( self, instring, loc, doActions=True ):
2133 if loc != 0:
2134 # see if entire string up to here is just whitespace and ignoreables
2135 if loc != self.preParse( instring, 0 ):
2136 #~ raise ParseException( instring, loc, "Expected start of text" )
2137 exc = self.myException
2138 exc.loc = loc
2139 exc.pstr = instring
2140 raise exc
2141 return loc, []
2142
2143 class StringEnd(_PositionToken):
2144 """Matches if current position is at the end of the parse string"""
2145 def __init__( self ):
2146 super(StringEnd,self).__init__()
2147 self.errmsg = "Expected end of text"
2148 #self.myException.msg = self.errmsg
2149
2150 def parseImpl( self, instring, loc, doActions=True ):
2151 if loc < len(instring):
2152 #~ raise ParseException( instring, loc, "Expected end of text" )
2153 exc = self.myException
2154 exc.loc = loc
2155 exc.pstr = instring
2156 raise exc
2157 elif loc == len(instring):
2158 return loc+1, []
2159 elif loc > len(instring):
2160 return loc, []
2161 else:
2162 exc = self.myException
2163 exc.loc = loc
2164 exc.pstr = instring
2165 raise exc
2166
2167 class WordStart(_PositionToken):
2168 """Matches if the current position is at the beginning of a Word, and
2169 is not preceded by any character in a given set of wordChars
2170 (default=printables). To emulate the \b behavior of regular expressions,
2171 use WordStart(alphanums). WordStart will also match at the beginning of
2172 the string being parsed, or at the beginning of a line.
2173 """
2174 def __init__(self, wordChars = printables):
2175 super(WordStart,self).__init__()
2176 self.wordChars = _str2dict(wordChars)
2177 self.errmsg = "Not at the start of a word"
2178
2179 def parseImpl(self, instring, loc, doActions=True ):
2180 if loc != 0:
2181 if (instring[loc-1] in self.wordChars or
2182 instring[loc] not in self.wordChars):
2183 exc = self.myException
2184 exc.loc = loc
2185 exc.pstr = instring
2186 raise exc
2187 return loc, []
2188
2189 class WordEnd(_PositionToken):
2190 """Matches if the current position is at the end of a Word, and
2191 is not followed by any character in a given set of wordChars
2192 (default=printables). To emulate the \b behavior of regular expressions,
2193 use WordEnd(alphanums). WordEnd will also match at the end of
2194 the string being parsed, or at the end of a line.
2195 """
2196 def __init__(self, wordChars = printables):
2197 super(WordEnd,self).__init__()
2198 self.wordChars = _str2dict(wordChars)
2199 self.skipWhitespace = False
2200 self.errmsg = "Not at the end of a word"
2201
2202 def parseImpl(self, instring, loc, doActions=True ):
2203 instrlen = len(instring)
2204 if instrlen>0 and loc<instrlen:
2205 if (instring[loc] in self.wordChars or
2206 instring[loc-1] not in self.wordChars):
2207 #~ raise ParseException( instring, loc, "Expected end of word" )
2208 exc = self.myException
2209 exc.loc = loc
2210 exc.pstr = instring
2211 raise exc
2212 return loc, []
2213
2214
2215 class ParseExpression(ParserElement):
2216 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2217 def __init__( self, exprs, savelist = False ):
2218 super(ParseExpression,self).__init__(savelist)
2219 if isinstance( exprs, list ):
2220 self.exprs = exprs
2221 elif isinstance( exprs, basestring ):
2222 self.exprs = [ Literal( exprs ) ]
2223 else:
2224 try:
2225 self.exprs = list( exprs )
2226 except TypeError:
2227 self.exprs = [ exprs ]
2228 self.callPreparse = False
2229
2230 def __getitem__( self, i ):
2231 return self.exprs[i]
2232
2233 def append( self, other ):
2234 self.exprs.append( other )
2235 self.strRepr = None
2236 return self
2237
2238 def leaveWhitespace( self ):
2239 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2240 all contained expressions."""
2241 self.skipWhitespace = False
2242 self.exprs = [ e.copy() for e in self.exprs ]
2243 for e in self.exprs:
2244 e.leaveWhitespace()
2245 return self
2246
2247 def ignore( self, other ):
2248 if isinstance( other, Suppress ):
2249 if other not in self.ignoreExprs:
2250 super( ParseExpression, self).ignore( other )
2251 for e in self.exprs:
2252 e.ignore( self.ignoreExprs[-1] )
2253 else:
2254 super( ParseExpression, self).ignore( other )
2255 for e in self.exprs:
2256 e.ignore( self.ignoreExprs[-1] )
2257 return self
2258
2259 def __str__( self ):
2260 try:
2261 return super(ParseExpression,self).__str__()
2262 except:
2263 pass
2264
2265 if self.strRepr is None:
2266 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2267 return self.strRepr
2268
2269 def streamline( self ):
2270 super(ParseExpression,self).streamline()
2271
2272 for e in self.exprs:
2273 e.streamline()
2274
2275 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2276 # but only if there are no parse actions or resultsNames on the nested And's
2277 # (likewise for Or's and MatchFirst's)
2278 if ( len(self.exprs) == 2 ):
2279 other = self.exprs[0]
2280 if ( isinstance( other, self.__class__ ) and
2281 not(other.parseAction) and
2282 other.resultsName is None and
2283 not other.debug ):
2284 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2285 self.strRepr = None
2286 self.mayReturnEmpty |= other.mayReturnEmpty
2287 self.mayIndexError |= other.mayIndexError
2288
2289 other = self.exprs[-1]
2290 if ( isinstance( other, self.__class__ ) and
2291 not(other.parseAction) and
2292 other.resultsName is None and
2293 not other.debug ):
2294 self.exprs = self.exprs[:-1] + other.exprs[:]
2295 self.strRepr = None
2296 self.mayReturnEmpty |= other.mayReturnEmpty
2297 self.mayIndexError |= other.mayIndexError
2298
2299 return self
2300
2301 def setResultsName( self, name, listAllMatches=False ):
2302 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
2303 return ret
2304
2305 def validate( self, validateTrace=[] ):
2306 tmp = validateTrace[:]+[self]
2307 for e in self.exprs:
2308 e.validate(tmp)
2309 self.checkRecursion( [] )
2310
2311 class And(ParseExpression):
2312 """Requires all given ParseExpressions to be found in the given order.
2313 Expressions may be separated by whitespace.
2314 May be constructed using the '+' operator.
2315 """
2316
2317 class _ErrorStop(Empty):
2318 def __init__(self, *args, **kwargs):
2319 super(Empty,self).__init__(*args, **kwargs)
2320 self.leaveWhitespace()
2321
2322 def __init__( self, exprs, savelist = True ):
2323 super(And,self).__init__(exprs, savelist)
2324 self.mayReturnEmpty = True
2325 for e in self.exprs:
2326 if not e.mayReturnEmpty:
2327 self.mayReturnEmpty = False
2328 break
2329 self.setWhitespaceChars( exprs[0].whiteChars )
2330 self.skipWhitespace = exprs[0].skipWhitespace
2331 self.callPreparse = True
2332
2333 def parseImpl( self, instring, loc, doActions=True ):
2334 # pass False as last arg to _parse for first element, since we already
2335 # pre-parsed the string as part of our And pre-parsing
2336 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2337 errorStop = False
2338 for e in self.exprs[1:]:
2339 if isinstance(e, And._ErrorStop):
2340 errorStop = True
2341 continue
2342 if errorStop:
2343 try:
2344 loc, exprtokens = e._parse( instring, loc, doActions )
2345 except ParseSyntaxException:
2346 raise
2347 except ParseBaseException, pe:
2348 raise ParseSyntaxException(pe)
2349 except IndexError, ie:
2350 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2351 else:
2352 loc, exprtokens = e._parse( instring, loc, doActions )
2353 if exprtokens or exprtokens.keys():
2354 resultlist += exprtokens
2355 return loc, resultlist
2356
2357 def __iadd__(self, other ):
2358 if isinstance( other, basestring ):
2359 other = Literal( other )
2360 return self.append( other ) #And( [ self, other ] )
2361
2362 def checkRecursion( self, parseElementList ):
2363 subRecCheckList = parseElementList[:] + [ self ]
2364 for e in self.exprs:
2365 e.checkRecursion( subRecCheckList )
2366 if not e.mayReturnEmpty:
2367 break
2368
2369 def __str__( self ):
2370 if hasattr(self,"name"):
2371 return self.name
2372
2373 if self.strRepr is None:
2374 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2375
2376 return self.strRepr
2377
2378
2379 class Or(ParseExpression):
2380 """Requires that at least one ParseExpression is found.
2381 If two expressions match, the expression that matches the longest string will be used.
2382 May be constructed using the '^' operator.
2383 """
2384 def __init__( self, exprs, savelist = False ):
2385 super(Or,self).__init__(exprs, savelist)
2386 self.mayReturnEmpty = False
2387 for e in self.exprs:
2388 if e.mayReturnEmpty:
2389 self.mayReturnEmpty = True
2390 break
2391
2392 def parseImpl( self, instring, loc, doActions=True ):
2393 maxExcLoc = -1
2394 maxMatchLoc = -1
2395 maxException = None
2396 for e in self.exprs:
2397 try:
2398 loc2 = e.tryParse( instring, loc )
2399 except ParseException, err:
2400 if err.loc > maxExcLoc:
2401 maxException = err
2402 maxExcLoc = err.loc
2403 except IndexError:
2404 if len(instring) > maxExcLoc:
2405 maxException = ParseException(instring,len(instring),e.errmsg,self)
2406 maxExcLoc = len(instring)
2407 else:
2408 if loc2 > maxMatchLoc:
2409 maxMatchLoc = loc2
2410 maxMatchExp = e
2411
2412 if maxMatchLoc < 0:
2413 if maxException is not None:
2414 raise maxException
2415 else:
2416 raise ParseException(instring, loc, "no defined alternatives to match", self)
2417
2418 return maxMatchExp._parse( instring, loc, doActions )
2419
2420 def __ixor__(self, other ):
2421 if isinstance( other, basestring ):
2422 other = Literal( other )
2423 return self.append( other ) #Or( [ self, other ] )
2424
2425 def __str__( self ):
2426 if hasattr(self,"name"):
2427 return self.name
2428
2429 if self.strRepr is None:
2430 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2431
2432 return self.strRepr
2433
2434 def checkRecursion( self, parseElementList ):
2435 subRecCheckList = parseElementList[:] + [ self ]
2436 for e in self.exprs:
2437 e.checkRecursion( subRecCheckList )
2438
2439
2440 class MatchFirst(ParseExpression):
2441 """Requires that at least one ParseExpression is found.
2442 If two expressions match, the first one listed is the one that will match.
2443 May be constructed using the '|' operator.
2444 """
2445 def __init__( self, exprs, savelist = False ):
2446 super(MatchFirst,self).__init__(exprs, savelist)
2447 if exprs:
2448 self.mayReturnEmpty = False
2449 for e in self.exprs:
2450 if e.mayReturnEmpty:
2451 self.mayReturnEmpty = True
2452 break
2453 else:
2454 self.mayReturnEmpty = True
2455
2456 def parseImpl( self, instring, loc, doActions=True ):
2457 maxExcLoc = -1
2458 maxException = None
2459 for e in self.exprs:
2460 try:
2461 ret = e._parse( instring, loc, doActions )
2462 return ret
2463 except ParseException, err:
2464 if err.loc > maxExcLoc:
2465 maxException = err
2466 maxExcLoc = err.loc
2467 except IndexError:
2468 if len(instring) > maxExcLoc:
2469 maxException = ParseException(instring,len(instring),e.errmsg,self)
2470 maxExcLoc = len(instring)
2471
2472 # only got here if no expression matched, raise exception for match that made it the furthest
2473 else:
2474 if maxException is not None:
2475 raise maxException
2476 else:
2477 raise ParseException(instring, loc, "no defined alternatives to match", self)
2478
2479 def __ior__(self, other ):
2480 if isinstance( other, basestring ):
2481 other = Literal( other )
2482 return self.append( other ) #MatchFirst( [ self, other ] )
2483
2484 def __str__( self ):
2485 if hasattr(self,"name"):
2486 return self.name
2487
2488 if self.strRepr is None:
2489 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2490
2491 return self.strRepr
2492
2493 def checkRecursion( self, parseElementList ):
2494 subRecCheckList = parseElementList[:] + [ self ]
2495 for e in self.exprs:
2496 e.checkRecursion( subRecCheckList )
2497
2498
2499 class Each(ParseExpression):
2500 """Requires all given ParseExpressions to be found, but in any order.
2501 Expressions may be separated by whitespace.
2502 May be constructed using the '&' operator.
2503 """
2504 def __init__( self, exprs, savelist = True ):
2505 super(Each,self).__init__(exprs, savelist)
2506 self.mayReturnEmpty = True
2507 for e in self.exprs:
2508 if not e.mayReturnEmpty:
2509 self.mayReturnEmpty = False
2510 break
2511 self.skipWhitespace = True
2512 self.initExprGroups = True
2513
2514 def parseImpl( self, instring, loc, doActions=True ):
2515 if self.initExprGroups:
2516 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2517 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2518 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2519 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2520 self.required += self.multirequired
2521 self.initExprGroups = False
2522 tmpLoc = loc
2523 tmpReqd = self.required[:]
2524 tmpOpt = self.optionals[:]
2525 matchOrder = []
2526
2527 keepMatching = True
2528 while keepMatching:
2529 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2530 failed = []
2531 for e in tmpExprs:
2532 try:
2533 tmpLoc = e.tryParse( instring, tmpLoc )
2534 except ParseException:
2535 failed.append(e)
2536 else:
2537 matchOrder.append(e)
2538 if e in tmpReqd:
2539 tmpReqd.remove(e)
2540 elif e in tmpOpt:
2541 tmpOpt.remove(e)
2542 if len(failed) == len(tmpExprs):
2543 keepMatching = False
2544
2545 if tmpReqd:
2546 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2547 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2548
2549 # add any unmatched Optionals, in case they have default values defined
2550 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
2551
2552 resultlist = []
2553 for e in matchOrder:
2554 loc,results = e._parse(instring,loc,doActions)
2555 resultlist.append(results)
2556
2557 finalResults = ParseResults([])
2558 for r in resultlist:
2559 dups = {}
2560 for k in r.keys():
2561 if k in finalResults.keys():
2562 tmp = ParseResults(finalResults[k])
2563 tmp += ParseResults(r[k])
2564 dups[k] = tmp
2565 finalResults += ParseResults(r)
2566 for k,v in dups.items():
2567 finalResults[k] = v
2568 return loc, finalResults
2569
2570 def __str__( self ):
2571 if hasattr(self,"name"):
2572 return self.name
2573
2574 if self.strRepr is None:
2575 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2576
2577 return self.strRepr
2578
2579 def checkRecursion( self, parseElementList ):
2580 subRecCheckList = parseElementList[:] + [ self ]
2581 for e in self.exprs:
2582 e.checkRecursion( subRecCheckList )
2583
2584
2585 class ParseElementEnhance(ParserElement):
2586 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2587 def __init__( self, expr, savelist=False ):
2588 super(ParseElementEnhance,self).__init__(savelist)
2589 if isinstance( expr, basestring ):
2590 expr = Literal(expr)
2591 self.expr = expr
2592 self.strRepr = None
2593 if expr is not None:
2594 self.mayIndexError = expr.mayIndexError
2595 self.mayReturnEmpty = expr.mayReturnEmpty
2596 self.setWhitespaceChars( expr.whiteChars )
2597 self.skipWhitespace = expr.skipWhitespace
2598 self.saveAsList = expr.saveAsList
2599 self.callPreparse = expr.callPreparse
2600 self.ignoreExprs.extend(expr.ignoreExprs)
2601
2602 def parseImpl( self, instring, loc, doActions=True ):
2603 if self.expr is not None:
2604 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2605 else:
2606 raise ParseException("",loc,self.errmsg,self)
2607
2608 def leaveWhitespace( self ):
2609 self.skipWhitespace = False
2610 self.expr = self.expr.copy()
2611 if self.expr is not None:
2612 self.expr.leaveWhitespace()
2613 return self
2614
2615 def ignore( self, other ):
2616 if isinstance( other, Suppress ):
2617 if other not in self.ignoreExprs:
2618 super( ParseElementEnhance, self).ignore( other )
2619 if self.expr is not None:
2620 self.expr.ignore( self.ignoreExprs[-1] )
2621 else:
2622 super( ParseElementEnhance, self).ignore( other )
2623 if self.expr is not None:
2624 self.expr.ignore( self.ignoreExprs[-1] )
2625 return self
2626
2627 def streamline( self ):
2628 super(ParseElementEnhance,self).streamline()
2629 if self.expr is not None:
2630 self.expr.streamline()
2631 return self
2632
2633 def checkRecursion( self, parseElementList ):
2634 if self in parseElementList:
2635 raise RecursiveGrammarException( parseElementList+[self] )
2636 subRecCheckList = parseElementList[:] + [ self ]
2637 if self.expr is not None:
2638 self.expr.checkRecursion( subRecCheckList )
2639
2640 def validate( self, validateTrace=[] ):
2641 tmp = validateTrace[:]+[self]
2642 if self.expr is not None:
2643 self.expr.validate(tmp)
2644 self.checkRecursion( [] )
2645
2646 def __str__( self ):
2647 try:
2648 return super(ParseElementEnhance,self).__str__()
2649 except:
2650 pass
2651
2652 if self.strRepr is None and self.expr is not None:
2653 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2654 return self.strRepr
2655
2656
2657 class FollowedBy(ParseElementEnhance):
2658 """Lookahead matching of the given parse expression. FollowedBy
2659 does *not* advance the parsing position within the input string, it only
2660 verifies that the specified parse expression matches at the current
2661 position. FollowedBy always returns a null token list."""
2662 def __init__( self, expr ):
2663 super(FollowedBy,self).__init__(expr)
2664 self.mayReturnEmpty = True
2665
2666 def parseImpl( self, instring, loc, doActions=True ):
2667 self.expr.tryParse( instring, loc )
2668 return loc, []
2669
2670
2671 class NotAny(ParseElementEnhance):
2672 """Lookahead to disallow matching with the given parse expression. NotAny
2673 does *not* advance the parsing position within the input string, it only
2674 verifies that the specified parse expression does *not* match at the current
2675 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2676 always returns a null token list. May be constructed using the '~' operator."""
2677 def __init__( self, expr ):
2678 super(NotAny,self).__init__(expr)
2679 #~ self.leaveWhitespace()
2680 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2681 self.mayReturnEmpty = True
2682 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2683 #self.myException = ParseException("",0,self.errmsg,self)
2684
2685 def parseImpl( self, instring, loc, doActions=True ):
2686 try:
2687 self.expr.tryParse( instring, loc )
2688 except (ParseException,IndexError):
2689 pass
2690 else:
2691 #~ raise ParseException(instring, loc, self.errmsg )
2692 exc = self.myException
2693 exc.loc = loc
2694 exc.pstr = instring
2695 raise exc
2696 return loc, []
2697
2698 def __str__( self ):
2699 if hasattr(self,"name"):
2700 return self.name
2701
2702 if self.strRepr is None:
2703 self.strRepr = "~{" + _ustr(self.expr) + "}"
2704
2705 return self.strRepr
2706
2707
2708 class ZeroOrMore(ParseElementEnhance):
2709 """Optional repetition of zero or more of the given expression."""
2710 def __init__( self, expr ):
2711 super(ZeroOrMore,self).__init__(expr)
2712 self.mayReturnEmpty = True
2713
2714 def parseImpl( self, instring, loc, doActions=True ):
2715 tokens = []
2716 try:
2717 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2718 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2719 while 1:
2720 if hasIgnoreExprs:
2721 preloc = self._skipIgnorables( instring, loc )
2722 else:
2723 preloc = loc
2724 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2725 if tmptokens or tmptokens.keys():
2726 tokens += tmptokens
2727 except (ParseException,IndexError):
2728 pass
2729
2730 return loc, tokens
2731
2732 def __str__( self ):
2733 if hasattr(self,"name"):
2734 return self.name
2735
2736 if self.strRepr is None:
2737 self.strRepr = "[" + _ustr(self.expr) + "]..."
2738
2739 return self.strRepr
2740
2741 def setResultsName( self, name, listAllMatches=False ):
2742 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2743 ret.saveAsList = True
2744 return ret
2745
2746
2747 class OneOrMore(ParseElementEnhance):
2748 """Repetition of one or more of the given expression."""
2749 def parseImpl( self, instring, loc, doActions=True ):
2750 # must be at least one
2751 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2752 try:
2753 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2754 while 1:
2755 if hasIgnoreExprs:
2756 preloc = self._skipIgnorables( instring, loc )
2757 else:
2758 preloc = loc
2759 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2760 if tmptokens or tmptokens.keys():
2761 tokens += tmptokens
2762 except (ParseException,IndexError):
2763 pass
2764
2765 return loc, tokens
2766
2767 def __str__( self ):
2768 if hasattr(self,"name"):
2769 return self.name
2770
2771 if self.strRepr is None:
2772 self.strRepr = "{" + _ustr(self.expr) + "}..."
2773
2774 return self.strRepr
2775
2776 def setResultsName( self, name, listAllMatches=False ):
2777 ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2778 ret.saveAsList = True
2779 return ret
2780
2781 class _NullToken(object):
2782 def __bool__(self):
2783 return False
2784 __nonzero__ = __bool__
2785 def __str__(self):
2786 return ""
2787
2788 _optionalNotMatched = _NullToken()
2789 class Optional(ParseElementEnhance):
2790 """Optional matching of the given expression.
2791 A default return string can also be specified, if the optional expression
2792 is not found.
2793 """
2794 def __init__( self, exprs, default=_optionalNotMatched ):
2795 super(Optional,self).__init__( exprs, savelist=False )
2796 self.defaultValue = default
2797 self.mayReturnEmpty = True
2798
2799 def parseImpl( self, instring, loc, doActions=True ):
2800 try:
2801 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2802 except (ParseException,IndexError):
2803 if self.defaultValue is not _optionalNotMatched:
2804 if self.expr.resultsName:
2805 tokens = ParseResults([ self.defaultValue ])
2806 tokens[self.expr.resultsName] = self.defaultValue
2807 else:
2808 tokens = [ self.defaultValue ]
2809 else:
2810 tokens = []
2811 return loc, tokens
2812
2813 def __str__( self ):
2814 if hasattr(self,"name"):
2815 return self.name
2816
2817 if self.strRepr is None:
2818 self.strRepr = "[" + _ustr(self.expr) + "]"
2819
2820 return self.strRepr
2821
2822
2823 class SkipTo(ParseElementEnhance):
2824 """Token for skipping over all undefined text until the matched expression is found.
2825 If include is set to true, the matched expression is also parsed (the skipped text
2826 and matched expression are returned as a 2-element list). The ignore
2827 argument is used to define grammars (typically quoted strings and comments) that
2828 might contain false matches.
2829 """
2830 def __init__( self, other, include=False, ignore=None, failOn=None ):
2831 super( SkipTo, self ).__init__( other )
2832 self.ignoreExpr = ignore
2833 self.mayReturnEmpty = True
2834 self.mayIndexError = False
2835 self.includeMatch = include
2836 self.asList = False
2837 if failOn is not None and isinstance(failOn, basestring):
2838 self.failOn = Literal(failOn)
2839 else:
2840 self.failOn = failOn
2841 self.errmsg = "No match found for "+_ustr(self.expr)
2842 #self.myException = ParseException("",0,self.errmsg,self)
2843
2844 def parseImpl( self, instring, loc, doActions=True ):
2845 startLoc = loc
2846 instrlen = len(instring)
2847 expr = self.expr
2848 failParse = False
2849 while loc <= instrlen:
2850 try:
2851 if self.failOn:
2852 try:
2853 self.failOn.tryParse(instring, loc)
2854 except ParseBaseException:
2855 pass
2856 else:
2857 failParse = True
2858 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2859 failParse = False
2860 if self.ignoreExpr is not None:
2861 while 1:
2862 try:
2863 loc = self.ignoreExpr.tryParse(instring,loc)
2864 print "found ignoreExpr, advance to", loc
2865 except ParseBaseException:
2866 break
2867 expr._parse( instring, loc, doActions=False, callPreParse=False )
2868 skipText = instring[startLoc:loc]
2869 if self.includeMatch:
2870 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2871 if mat:
2872 skipRes = ParseResults( skipText )
2873 skipRes += mat
2874 return loc, [ skipRes ]
2875 else:
2876 return loc, [ skipText ]
2877 else:
2878 return loc, [ skipText ]
2879 except (ParseException,IndexError):
2880 if failParse:
2881 raise
2882 else:
2883 loc += 1
2884 exc = self.myException
2885 exc.loc = loc
2886 exc.pstr = instring
2887 raise exc
2888
2889 class Forward(ParseElementEnhance):
2890 """Forward declaration of an expression to be defined later -
2891 used for recursive grammars, such as algebraic infix notation.
2892 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2893
2894 Note: take care when assigning to Forward not to overlook precedence of operators.
2895 Specifically, '|' has a lower precedence than '<<', so that::
2896 fwdExpr << a | b | c
2897 will actually be evaluated as::
2898 (fwdExpr << a) | b | c
2899 thereby leaving b and c out as parseable alternatives. It is recommended that you
2900 explicitly group the values inserted into the Forward::
2901 fwdExpr << (a | b | c)
2902 """
2903 def __init__( self, other=None ):
2904 super(Forward,self).__init__( other, savelist=False )
2905
2906 def __lshift__( self, other ):
2907 if isinstance( other, basestring ):
2908 other = Literal(other)
2909 self.expr = other
2910 self.mayReturnEmpty = other.mayReturnEmpty
2911 self.strRepr = None
2912 self.mayIndexError = self.expr.mayIndexError
2913 self.mayReturnEmpty = self.expr.mayReturnEmpty
2914 self.setWhitespaceChars( self.expr.whiteChars )
2915 self.skipWhitespace = self.expr.skipWhitespace
2916 self.saveAsList = self.expr.saveAsList
2917 self.ignoreExprs.extend(self.expr.ignoreExprs)
2918 return None
2919
2920 def leaveWhitespace( self ):
2921 self.skipWhitespace = False
2922 return self
2923
2924 def streamline( self ):
2925 if not self.streamlined:
2926 self.streamlined = True
2927 if self.expr is not None:
2928 self.expr.streamline()
2929 return self
2930
2931 def validate( self, validateTrace=[] ):
2932 if self not in validateTrace:
2933 tmp = validateTrace[:]+[self]
2934 if self.expr is not None:
2935 self.expr.validate(tmp)
2936 self.checkRecursion([])
2937
2938 def __str__( self ):
2939 if hasattr(self,"name"):
2940 return self.name
2941
2942 self._revertClass = self.__class__
2943 self.__class__ = _ForwardNoRecurse
2944 try:
2945 if self.expr is not None:
2946 retString = _ustr(self.expr)
2947 else:
2948 retString = "None"
2949 finally:
2950 self.__class__ = self._revertClass
2951 return self.__class__.__name__ + ": " + retString
2952
2953 def copy(self):
2954 if self.expr is not None:
2955 return super(Forward,self).copy()
2956 else:
2957 ret = Forward()
2958 ret << self
2959 return ret
2960
2961 class _ForwardNoRecurse(Forward):
2962 def __str__( self ):
2963 return "..."
2964
2965 class TokenConverter(ParseElementEnhance):
2966 """Abstract subclass of ParseExpression, for converting parsed results."""
2967 def __init__( self, expr, savelist=False ):
2968 super(TokenConverter,self).__init__( expr )#, savelist )
2969 self.saveAsList = False
2970
2971 class Upcase(TokenConverter):
2972 """Converter to upper case all matching tokens."""
2973 def __init__(self, *args):
2974 super(Upcase,self).__init__(*args)
2975 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2976 DeprecationWarning,stacklevel=2)
2977
2978 def postParse( self, instring, loc, tokenlist ):
2979 return list(map( string.upper, tokenlist ))
2980
2981
2982 class Combine(TokenConverter):
2983 """Converter to concatenate all matching tokens to a single string.
2984 By default, the matching patterns must also be contiguous in the input string;
2985 this can be disabled by specifying 'adjacent=False' in the constructor.
2986 """
2987 def __init__( self, expr, joinString="", adjacent=True ):
2988 super(Combine,self).__init__( expr )
2989 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2990 if adjacent:
2991 self.leaveWhitespace()
2992 self.adjacent = adjacent
2993 self.skipWhitespace = True
2994 self.joinString = joinString
2995
2996 def ignore( self, other ):
2997 if self.adjacent:
2998 ParserElement.ignore(self, other)
2999 else:
3000 super( Combine, self).ignore( other )
3001 return self
3002
3003 def postParse( self, instring, loc, tokenlist ):
3004 retToks = tokenlist.copy()
3005 del retToks[:]
3006 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3007
3008 if self.resultsName and len(retToks.keys())>0:
3009 return [ retToks ]
3010 else:
3011 return retToks
3012
3013 class Group(TokenConverter):
3014 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3015 def __init__( self, expr ):
3016 super(Group,self).__init__( expr )
3017 self.saveAsList = True
3018
3019 def postParse( self, instring, loc, tokenlist ):
3020 return [ tokenlist ]
3021
3022 class Dict(TokenConverter):
3023 """Converter to return a repetitive expression as a list, but also as a dictionary.
3024 Each element can also be referenced using the first token in the expression as its key.
3025 Useful for tabular report scraping when the first column can be used as a item key.
3026 """
3027 def __init__( self, exprs ):
3028 super(Dict,self).__init__( exprs )
3029 self.saveAsList = True
3030
3031 def postParse( self, instring, loc, tokenlist ):
3032 for i,tok in enumerate(tokenlist):
3033 if len(tok) == 0:
3034 continue
3035 ikey = tok[0]
3036 if isinstance(ikey,int):
3037 ikey = _ustr(tok[0]).strip()
3038 if len(tok)==1:
3039 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3040 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3041 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3042 else:
3043 dictvalue = tok.copy() #ParseResults(i)
3044 del dictvalue[0]
3045 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3046 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3047 else:
3048 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3049
3050 if self.resultsName:
3051 return [ tokenlist ]
3052 else:
3053 return tokenlist
3054
3055
3056 class Suppress(TokenConverter):
3057 """Converter for ignoring the results of a parsed expression."""
3058 def postParse( self, instring, loc, tokenlist ):
3059 return []
3060
3061 def suppress( self ):
3062 return self
3063
3064
3065 class OnlyOnce(object):
3066 """Wrapper for parse actions, to ensure they are only called once."""
3067 def __init__(self, methodCall):
3068 self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3069 self.called = False
3070 def __call__(self,s,l,t):
3071 if not self.called:
3072 results = self.callable(s,l,t)
3073 self.called = True
3074 return results
3075 raise ParseException(s,l,"")
3076 def reset(self):
3077 self.called = False
3078
3079 def traceParseAction(f):
3080 """Decorator for debugging parse actions."""
3081 f = ParserElement._normalizeParseActionArgs(f)
3082 def z(*paArgs):
3083 thisFunc = f.func_name
3084 s,l,t = paArgs[-3:]
3085 if len(paArgs)>3:
3086 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3087 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3088 try:
3089 ret = f(*paArgs)
3090 except Exception, exc:
3091 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3092 raise
3093 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3094 return ret
3095 try:
3096 z.__name__ = f.__name__
3097 except AttributeError:
3098 pass
3099 return z
3100
3101 #
3102 # global helpers
3103 #
3104 def delimitedList( expr, delim=",", combine=False ):
3105 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3106 By default, the list elements and delimiters can have intervening whitespace, and
3107 comments, but this can be overridden by passing 'combine=True' in the constructor.
3108 If combine is set to True, the matching tokens are returned as a single token
3109 string, with the delimiters included; otherwise, the matching tokens are returned
3110 as a list of tokens, with the delimiters suppressed.
3111 """
3112 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3113 if combine:
3114 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3115 else:
3116 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3117
3118 def countedArray( expr ):
3119 """Helper to define a counted list of expressions.
3120 This helper defines a pattern of the form::
3121 integer expr expr expr...
3122 where the leading integer tells how many expr expressions follow.
3123 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3124 """
3125 arrayExpr = Forward()
3126 def countFieldParseAction(s,l,t):
3127 n = int(t[0])
3128 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3129 return []
3130 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3131
3132 def _flatten(L):
3133 if type(L) is not list: return [L]
3134 if L == []: return L
3135 return _flatten(L[0]) + _flatten(L[1:])
3136
3137 def matchPreviousLiteral(expr):
3138 """Helper to define an expression that is indirectly defined from
3139 the tokens matched in a previous expression, that is, it looks
3140 for a 'repeat' of a previous expression. For example::
3141 first = Word(nums)
3142 second = matchPreviousLiteral(first)
3143 matchExpr = first + ":" + second
3144 will match "1:1", but not "1:2". Because this matches a
3145 previous literal, will also match the leading "1:1" in "1:10".
3146 If this is not desired, use matchPreviousExpr.
3147 Do *not* use with packrat parsing enabled.
3148 """
3149 rep = Forward()
3150 def copyTokenToRepeater(s,l,t):
3151 if t:
3152 if len(t) == 1:
3153 rep << t[0]
3154 else:
3155 # flatten t tokens
3156 tflat = _flatten(t.asList())
3157 rep << And( [ Literal(tt) for tt in tflat ] )
3158 else:
3159 rep << Empty()
3160 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3161 return rep
3162
3163 def matchPreviousExpr(expr):
3164 """Helper to define an expression that is indirectly defined from
3165 the tokens matched in a previous expression, that is, it looks
3166 for a 'repeat' of a previous expression. For example::
3167 first = Word(nums)
3168 second = matchPreviousExpr(first)
3169 matchExpr = first + ":" + second
3170 will match "1:1", but not "1:2". Because this matches by
3171 expressions, will *not* match the leading "1:1" in "1:10";
3172 the expressions are evaluated first, and then compared, so
3173 "1" is compared with "10".
3174 Do *not* use with packrat parsing enabled.
3175 """
3176 rep = Forward()
3177 e2 = expr.copy()
3178 rep << e2
3179 def copyTokenToRepeater(s,l,t):
3180 matchTokens = _flatten(t.asList())
3181 def mustMatchTheseTokens(s,l,t):
3182 theseTokens = _flatten(t.asList())
3183 if theseTokens != matchTokens:
3184 raise ParseException("",0,"")
3185 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3186 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3187 return rep
3188
3189 def _escapeRegexRangeChars(s):
3190 #~ escape these chars: ^-]
3191 for c in r"\^-]":
3192 s = s.replace(c,_bslash+c)
3193 s = s.replace("\n",r"\n")
3194 s = s.replace("\t",r"\t")
3195 return _ustr(s)
3196
3197 def oneOf( strs, caseless=False, useRegex=True ):
3198 """Helper to quickly define a set of alternative Literals, and makes sure to do
3199 longest-first testing when there is a conflict, regardless of the input order,
3200 but returns a MatchFirst for best performance.
3201
3202 Parameters:
3203 - strs - a string of space-delimited literals, or a list of string literals
3204 - caseless - (default=False) - treat all literals as caseless
3205 - useRegex - (default=True) - as an optimization, will generate a Regex
3206 object; otherwise, will generate a MatchFirst object (if caseless=True, or
3207 if creating a Regex raises an exception)
3208 """
3209 if caseless:
3210 isequal = ( lambda a,b: a.upper() == b.upper() )
3211 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3212 parseElementClass = CaselessLiteral
3213 else:
3214 isequal = ( lambda a,b: a == b )
3215 masks = ( lambda a,b: b.startswith(a) )
3216 parseElementClass = Literal
3217
3218 if isinstance(strs,(list,tuple)):
3219 symbols = list(strs[:])
3220 elif isinstance(strs,basestring):
3221 symbols = strs.split()
3222 else:
3223 warnings.warn("Invalid argument to oneOf, expected string or list",
3224 SyntaxWarning, stacklevel=2)
3225
3226 i = 0
3227 while i < len(symbols)-1:
3228 cur = symbols[i]
3229 for j,other in enumerate(symbols[i+1:]):
3230 if ( isequal(other, cur) ):
3231 del symbols[i+j+1]
3232 break
3233 elif ( masks(cur, other) ):
3234 del symbols[i+j+1]
3235 symbols.insert(i,other)
3236 cur = other
3237 break
3238 else:
3239 i += 1
3240
3241 if not caseless and useRegex:
3242 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3243 try:
3244 if len(symbols)==len("".join(symbols)):
3245 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3246 else:
3247 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3248 except:
3249 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3250 SyntaxWarning, stacklevel=2)
3251
3252
3253 # last resort, just use MatchFirst
3254 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3255
3256 def dictOf( key, value ):
3257 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3258 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3259 in the proper order. The key pattern can include delimiting markers or punctuation,
3260 as long as they are suppressed, thereby leaving the significant key text. The value
3261 pattern can include named results, so that the Dict results can include named token
3262 fields.
3263 """
3264 return Dict( ZeroOrMore( Group ( key + value ) ) )
3265
3266 def originalTextFor(expr, asString=True):
3267 """Helper to return the original, untokenized text for a given expression. Useful to
3268 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3269 revert separate tokens with intervening whitespace back to the original matching
3270 input text. Simpler to use than the parse action keepOriginalText, and does not
3271 require the inspect module to chase up the call stack. By default, returns a
3272 string containing the original parsed text.
3273
3274 If the optional asString argument is passed as False, then the return value is a
3275 ParseResults containing any results names that were originally matched, and a
3276 single token containing the original matched text from the input string. So if
3277 the expression passed to originalTextFor contains expressions with defined
3278 results names, you must set asString to False if you want to preserve those
3279 results name values."""
3280 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3281 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
3282 if asString:
3283 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3284 else:
3285 def extractText(s,l,t):
3286 del t[:]
3287 t.insert(0, s[t._original_start:t._original_end])
3288 del t["_original_start"]
3289 del t["_original_end"]
3290 matchExpr.setParseAction(extractText)
3291 return matchExpr
3292
3293 # convenience constants for positional expressions
3294 empty = Empty().setName("empty")
3295 lineStart = LineStart().setName("lineStart")
3296 lineEnd = LineEnd().setName("lineEnd")
3297 stringStart = StringStart().setName("stringStart")
3298 stringEnd = StringEnd().setName("stringEnd")
3299
3300 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3301 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3302 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3303 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3304 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3305 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3306 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3307
3308 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3309
3310 def srange(s):
3311 r"""Helper to easily define string ranges for use in Word construction. Borrows
3312 syntax from regexp '[]' string range definitions::
3313 srange("[0-9]") -> "0123456789"
3314 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3315 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3316 The input string must be enclosed in []'s, and the returned string is the expanded
3317 character set joined into a single string.
3318 The values enclosed in the []'s may be::
3319 a single character
3320 an escaped character with a leading backslash (such as \- or \])
3321 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3322 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3323 a range of any of the above, separated by a dash ('a-z', etc.)
3324 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3325 """
3326 try:
3327 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3328 except:
3329 return ""
3330
3331 def matchOnlyAtCol(n):
3332 """Helper method for defining parse actions that require matching at a specific
3333 column in the input text.
3334 """
3335 def verifyCol(strg,locn,toks):
3336 if col(locn,strg) != n:
3337 raise ParseException(strg,locn,"matched token not at column %d" % n)
3338 return verifyCol
3339
3340 def replaceWith(replStr):
3341 """Helper method for common parse actions that simply return a literal value. Especially
3342 useful when used with transformString().
3343 """
3344 def _replFunc(*args):
3345 return [replStr]
3346 return _replFunc
3347
3348 def removeQuotes(s,l,t):
3349 """Helper parse action for removing quotation marks from parsed quoted strings.
3350 To use, add this parse action to quoted string using::
3351 quotedString.setParseAction( removeQuotes )
3352 """
3353 return t[0][1:-1]
3354
3355 def upcaseTokens(s,l,t):
3356 """Helper parse action to convert tokens to upper case."""
3357 return [ tt.upper() for tt in map(_ustr,t) ]
3358
3359 def downcaseTokens(s,l,t):
3360 """Helper parse action to convert tokens to lower case."""
3361 return [ tt.lower() for tt in map(_ustr,t) ]
3362
3363 def keepOriginalText(s,startLoc,t):
3364 """Helper parse action to preserve original parsed text,
3365 overriding any nested parse actions."""
3366 try:
3367 endloc = getTokensEndLoc()
3368 except ParseException:
3369 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3370 del t[:]
3371 t += ParseResults(s[startLoc:endloc])
3372 return t
3373
3374 def getTokensEndLoc():
3375 """Method to be called from within a parse action to determine the end
3376 location of the parsed tokens."""
3377 import inspect
3378 fstack = inspect.stack()
3379 try:
3380 # search up the stack (through intervening argument normalizers) for correct calling routine
3381 for f in fstack[2:]:
3382 if f[3] == "_parseNoCache":
3383 endloc = f[0].f_locals["loc"]
3384 return endloc
3385 else:
3386 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3387 finally:
3388 del fstack
3389
3390 def _makeTags(tagStr, xml):
3391 """Internal helper to construct opening and closing tag expressions, given a tag name"""
3392 if isinstance(tagStr,basestring):
3393 resname = tagStr
3394 tagStr = Keyword(tagStr, caseless=not xml)
3395 else:
3396 resname = tagStr.name
3397
3398 tagAttrName = Word(alphas,alphanums+"_-:")
3399 if (xml):
3400 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3401 openTag = Suppress("<") + tagStr + \
3402 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
3403 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3404 else:
3405 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
3406 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
3407 openTag = Suppress("<") + tagStr + \
3408 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
3409 Optional( Suppress("=") + tagAttrValue ) ))) + \
3410 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3411 closeTag = Combine(_L("</") + tagStr + ">")
3412
3413 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3414 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3415
3416 return openTag, closeTag
3417
3418 def makeHTMLTags(tagStr):
3419 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3420 return _makeTags( tagStr, False )
3421
3422 def makeXMLTags(tagStr):
3423 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3424 return _makeTags( tagStr, True )
3425
3426 def withAttribute(*args,**attrDict):
3427 """Helper to create a validating parse action to be used with start tags created
3428 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3429 with a required attribute value, to avoid false matches on common tags such as
3430 <TD> or <DIV>.
3431
3432 Call withAttribute with a series of attribute names and values. Specify the list
3433 of filter attributes names and values as:
3434 - keyword arguments, as in (class="Customer",align="right"), or
3435 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3436 For attribute names with a namespace prefix, you must use the second form. Attribute
3437 names are matched insensitive to upper/lower case.
3438
3439 To verify that the attribute exists, but without specifying a value, pass
3440 withAttribute.ANY_VALUE as the value.
3441 """
3442 if args:
3443 attrs = args[:]
3444 else:
3445 attrs = attrDict.items()
3446 attrs = [(k,v) for k,v in attrs]
3447 def pa(s,l,tokens):
3448 for attrName,attrValue in attrs:
3449 if attrName not in tokens:
3450 raise ParseException(s,l,"no matching attribute " + attrName)
3451 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3452 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3453 (attrName, tokens[attrName], attrValue))
3454 return pa
3455 withAttribute.ANY_VALUE = object()
3456
3457 opAssoc = _Constants()
3458 opAssoc.LEFT = object()
3459 opAssoc.RIGHT = object()
3460
3461 def operatorPrecedence( baseExpr, opList ):
3462 """Helper method for constructing grammars of expressions made up of
3463 operators working in a precedence hierarchy. Operators may be unary or
3464 binary, left- or right-associative. Parse actions can also be attached
3465 to operator expressions.
3466
3467 Parameters:
3468 - baseExpr - expression representing the most basic element for the nested
3469 - opList - list of tuples, one for each operator precedence level in the
3470 expression grammar; each tuple is of the form
3471 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3472 - opExpr is the pyparsing expression for the operator;
3473 may also be a string, which will be converted to a Literal;
3474 if numTerms is 3, opExpr is a tuple of two expressions, for the
3475 two operators separating the 3 terms
3476 - numTerms is the number of terms for this operator (must
3477 be 1, 2, or 3)
3478 - rightLeftAssoc is the indicator whether the operator is
3479 right or left associative, using the pyparsing-defined
3480 constants opAssoc.RIGHT and opAssoc.LEFT.
3481 - parseAction is the parse action to be associated with
3482 expressions matching this operator expression (the
3483 parse action tuple member may be omitted)
3484 """
3485 ret = Forward()
3486 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3487 for i,operDef in enumerate(opList):
3488 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3489 if arity == 3:
3490 if opExpr is None or len(opExpr) != 2:
3491 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3492 opExpr1, opExpr2 = opExpr
3493 thisExpr = Forward()#.setName("expr%d" % i)
3494 if rightLeftAssoc == opAssoc.LEFT:
3495 if arity == 1:
3496 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3497 elif arity == 2:
3498 if opExpr is not None:
3499 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3500 else:
3501 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3502 elif arity == 3:
3503 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3504 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3505 else:
3506 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3507 elif rightLeftAssoc == opAssoc.RIGHT:
3508 if arity == 1:
3509 # try to avoid LR with this extra test
3510 if not isinstance(opExpr, Optional):
3511 opExpr = Optional(opExpr)
3512 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3513 elif arity == 2:
3514 if opExpr is not None:
3515 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3516 else:
3517 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3518 elif arity == 3:
3519 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3520 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3521 else:
3522 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3523 else:
3524 raise ValueError("operator must indicate right or left associativity")
3525 if pa:
3526 matchExpr.setParseAction( pa )
3527 thisExpr << ( matchExpr | lastExpr )
3528 lastExpr = thisExpr
3529 ret << lastExpr
3530 return ret
3531
3532 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3533 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3534 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3535 unicodeString = Combine(_L('u') + quotedString.copy())
3536
3537 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3538 """Helper method for defining nested lists enclosed in opening and closing
3539 delimiters ("(" and ")" are the default).
3540
3541 Parameters:
3542 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3543 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3544 - content - expression for items within the nested lists (default=None)
3545 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3546
3547 If an expression is not provided for the content argument, the nested
3548 expression will capture all whitespace-delimited content between delimiters
3549 as a list of separate values.
3550
3551 Use the ignoreExpr argument to define expressions that may contain
3552 opening or closing characters that should not be treated as opening
3553 or closing characters for nesting, such as quotedString or a comment
3554 expression. Specify multiple expressions using an Or or MatchFirst.
3555 The default is quotedString, but if no expressions are to be ignored,
3556 then pass None for this argument.
3557 """
3558 if opener == closer:
3559 raise ValueError("opening and closing strings cannot be the same")
3560 if content is None:
3561 if isinstance(opener,basestring) and isinstance(closer,basestring):
3562 if len(opener) == 1 and len(closer)==1:
3563 if ignoreExpr is not None:
3564 content = (Combine(OneOrMore(~ignoreExpr +
3565 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3566 ).setParseAction(lambda t:t[0].strip()))
3567 else:
3568 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3569 ).setParseAction(lambda t:t[0].strip()))
3570 else:
3571 if ignoreExpr is not None:
3572 content = (Combine(OneOrMore(~ignoreExpr +
3573 ~Literal(opener) + ~Literal(closer) +
3574 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3575 ).setParseAction(lambda t:t[0].strip()))
3576 else:
3577 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3578 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3579 ).setParseAction(lambda t:t[0].strip()))
3580 else:
3581 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3582 ret = Forward()
3583 if ignoreExpr is not None:
3584 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3585 else:
3586 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3587 return ret
3588
3589 def indentedBlock(blockStatementExpr, indentStack, indent=True):
3590 """Helper method for defining space-delimited indentation blocks, such as
3591 those used to define block statements in Python source code.
3592
3593 Parameters:
3594 - blockStatementExpr - expression defining syntax of statement that
3595 is repeated within the indented block
3596 - indentStack - list created by caller to manage indentation stack
3597 (multiple statementWithIndentedBlock expressions within a single grammar
3598 should share a common indentStack)
3599 - indent - boolean indicating whether block must be indented beyond the
3600 the current level; set to False for block of left-most statements
3601 (default=True)
3602
3603 A valid block must contain at least one blockStatement.
3604 """
3605 def checkPeerIndent(s,l,t):
3606 if l >= len(s): return
3607 curCol = col(l,s)
3608 if curCol != indentStack[-1]:
3609 if curCol > indentStack[-1]:
3610 raise ParseFatalException(s,l,"illegal nesting")
3611 raise ParseException(s,l,"not a peer entry")
3612
3613 def checkSubIndent(s,l,t):
3614 curCol = col(l,s)
3615 if curCol > indentStack[-1]:
3616 indentStack.append( curCol )
3617 else:
3618 raise ParseException(s,l,"not a subentry")
3619
3620 def checkUnindent(s,l,t):
3621 if l >= len(s): return
3622 curCol = col(l,s)
3623 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3624 raise ParseException(s,l,"not an unindent")
3625 indentStack.pop()
3626
3627 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3628 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3629 PEER = Empty().setParseAction(checkPeerIndent)
3630 UNDENT = Empty().setParseAction(checkUnindent)
3631 if indent:
3632 smExpr = Group( Optional(NL) +
3633 FollowedBy(blockStatementExpr) +
3634 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3635 else:
3636 smExpr = Group( Optional(NL) +
3637 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3638 blockStatementExpr.ignore(_bslash + LineEnd())
3639 return smExpr
3640
3641 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3642 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3643
3644 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3645 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3646 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3647 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3648
3649 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
3650 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3651
3652 htmlComment = Regex(r"<!--[\s\S]*?-->")
3653 restOfLine = Regex(r".*").leaveWhitespace()
3654 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3655 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3656
3657 javaStyleComment = cppStyleComment
3658 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3659 _noncomma = "".join( [ c for c in printables if c != "," ] )
3660 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3661 Optional( Word(" \t") +
3662 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3663 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3664
3665
3666 if __name__ == "__main__":
3667
3668 def test( teststring ):
3669 try:
3670 tokens = simpleSQL.parseString( teststring )
3671 tokenlist = tokens.asList()
3672 print (teststring + "->" + str(tokenlist))
3673 print ("tokens = " + str(tokens))
3674 print ("tokens.columns = " + str(tokens.columns))
3675 print ("tokens.tables = " + str(tokens.tables))
3676 print (tokens.asXML("SQL",True))
3677 except ParseBaseException,err:
3678 print (teststring + "->")
3679 print (err.line)
3680 print (" "*(err.column-1) + "^")
3681 print (err)
3682 print()
3683
3684 selectToken = CaselessLiteral( "select" )
3685 fromToken = CaselessLiteral( "from" )
3686
3687 ident = Word( alphas, alphanums + "_$" )
3688 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3689 columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3690 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3691 tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
3692 simpleSQL = ( selectToken + \
3693 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3694 fromToken + \
3695 tableNameList.setResultsName( "tables" ) )
3696
3697 test( "SELECT * from XYZZY, ABC" )
3698 test( "select * from SYS.XYZZY" )
3699 test( "Select A from Sys.dual" )
3700 test( "Select AA,BB,CC from Sys.dual" )
3701 test( "Select A, B, C from Sys.dual" )
3702 test( "Select A, B, C from Sys.dual" )
3703 test( "Xelect A, B, C from Sys.dual" )
3704 test( "Select A, B, C frox Sys.dual" )
3705 test( "Select" )
3706 test( "Select ^^^ frox Sys.dual" )
3707 test( "Select A, B, C from Sys.dual, Table2 " )
General Comments 0
You need to be logged in to leave comments. Login now