. Call withAttribute with a series of attribute names and values. Specify the list of filter attributes names and values as: - keyword arguments, as in (class="Customer",align="right"), or - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) For attribute names with a namespace prefix, you must use the second form. Attribute names are matched insensitive to upper/lower case. To verify that the attribute exists, but without specifying a value, pass withAttribute.ANY_VALUE as the value. """ if args: attrs = args[:] else: attrs = attrDict.iteritems() attrs = [(k,v) for k,v in attrs] def pa(s,l,tokens): for attrName,attrValue in attrs: if attrName not in tokens: raise ParseException(s,l,"no matching attribute " + attrName) if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % (attrName, tokens[attrName], attrValue)) return pa withAttribute.ANY_VALUE = object() opAssoc = _Constants() opAssoc.LEFT = object() opAssoc.RIGHT = object() def operatorPrecedence( baseExpr, opList ): """Helper method for constructing grammars of expressions made up of operators working in a precedence hierarchy. Operators may be unary or binary, left- or right-associative. Parse actions can also be attached to operator expressions. Parameters: - baseExpr - expression representing the most basic element for the nested - opList - list of tuples, one for each operator precedence level in the expression grammar; each tuple is of the form (opExpr, numTerms, rightLeftAssoc, parseAction), where: - opExpr is the pyparsing expression for the operator; may also be a string, which will be converted to a Literal; if numTerms is 3, opExpr is a tuple of two expressions, for the two operators separating the 3 terms - numTerms is the number of terms for this operator (must be 1, 2, or 3) - rightLeftAssoc is the indicator whether the operator is right or left associative, using the pyparsing-defined constants opAssoc.RIGHT and opAssoc.LEFT. - parseAction is the parse action to be associated with expressions matching this operator expression (the parse action tuple member may be omitted) """ ret = Forward() lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) for i,operDef in enumerate(opList): opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] if arity == 3: if opExpr is None or len(opExpr) != 2: raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") opExpr1, opExpr2 = opExpr thisExpr = Forward()#.setName("expr%d" % i) if rightLeftAssoc == opAssoc.LEFT: if arity == 1: matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) elif arity == 2: if opExpr is not None: matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) else: matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) elif arity == 3: matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) else: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") elif rightLeftAssoc == opAssoc.RIGHT: if arity == 1: # try to avoid LR with this extra test if not isinstance(opExpr, Optional): opExpr = Optional(opExpr) matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) elif arity == 2: if opExpr is not None: matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) else: matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) elif arity == 3: matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) else: raise ValueError("operator must be unary (1), binary (2), or ternary (3)") else: raise ValueError("operator must indicate right or left associativity") if pa: matchExpr.setParseAction( pa ) thisExpr << ( matchExpr | lastExpr ) lastExpr = thisExpr ret << lastExpr return ret dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") unicodeString = Combine(_L('u') + quotedString.copy()) def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString): """Helper method for defining nested lists enclosed in opening and closing delimiters ("(" and ")" are the default). Parameters: - opener - opening character for a nested list (default="("); can also be a pyparsing expression - closer - closing character for a nested list (default=")"); can also be a pyparsing expression - content - expression for items within the nested lists (default=None) - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) If an expression is not provided for the content argument, the nested expression will capture all whitespace-delimited content between delimiters as a list of separate values. Use the ignoreExpr argument to define expressions that may contain opening or closing characters that should not be treated as opening or closing characters for nesting, such as quotedString or a comment expression. Specify multiple expressions using an Or or MatchFirst. The default is quotedString, but if no expressions are to be ignored, then pass None for this argument. """ if opener == closer: raise ValueError("opening and closing strings cannot be the same") if content is None: if isinstance(opener,basestring) and isinstance(closer,basestring): if len(opener) == 1 and len(closer)==1: if ignoreExpr is not None: content = (Combine(OneOrMore(~ignoreExpr + CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS ).setParseAction(lambda t:t[0].strip())) else: if ignoreExpr is not None: content = (Combine(OneOrMore(~ignoreExpr + ~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) ).setParseAction(lambda t:t[0].strip())) else: raise ValueError("opening and closing arguments must be strings if no content expression is given") ret = Forward() if ignoreExpr is not None: ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) else: ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) return ret def indentedBlock(blockStatementExpr, indentStack, indent=True): """Helper method for defining space-delimited indentation blocks, such as those used to define block statements in Python source code. Parameters: - blockStatementExpr - expression defining syntax of statement that is repeated within the indented block - indentStack - list created by caller to manage indentation stack (multiple statementWithIndentedBlock expressions within a single grammar should share a common indentStack) - indent - boolean indicating whether block must be indented beyond the the current level; set to False for block of left-most statements (default=True) A valid block must contain at least one blockStatement. """ def checkPeerIndent(s,l,t): if l >= len(s): return curCol = col(l,s) if curCol != indentStack[-1]: if curCol > indentStack[-1]: raise ParseFatalException(s,l,"illegal nesting") raise ParseException(s,l,"not a peer entry") def checkSubIndent(s,l,t): curCol = col(l,s) if curCol > indentStack[-1]: indentStack.append( curCol ) else: raise ParseException(s,l,"not a subentry") def checkUnindent(s,l,t): if l >= len(s): return curCol = col(l,s) if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): raise ParseException(s,l,"not an unindent") indentStack.pop() NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) INDENT = Empty() + Empty().setParseAction(checkSubIndent) PEER = Empty().setParseAction(checkPeerIndent) UNDENT = Empty().setParseAction(checkUnindent) if indent: smExpr = Group( Optional(NL) + FollowedBy(blockStatementExpr) + INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) else: smExpr = Group( Optional(NL) + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) blockStatementExpr.ignore(_bslash + LineEnd()) return smExpr alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None # it's easy to get these comment structures wrong - they're very common, so may as well make them available cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") htmlComment = Regex(r"") restOfLine = Regex(r".*").leaveWhitespace() dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?" + str(tokenlist)) print ("tokens = " + str(tokens)) print ("tokens.columns = " + str(tokens.columns)) print ("tokens.tables = " + str(tokens.tables)) print (tokens.asXML("SQL",True)) except ParseBaseException,err: print (teststring + "->") print (err.line) print (" "*(err.column-1) + "^") print (err) print() selectToken = CaselessLiteral( "select" ) fromToken = CaselessLiteral( "from" ) ident = Word( alphas, alphanums + "_$" ) columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) columnNameList = Group( delimitedList( columnName ) )#.setName("columns") tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) tableNameList = Group( delimitedList( tableName ) )#.setName("tables") simpleSQL = ( selectToken + \ ( '*' | columnNameList ).setResultsName( "columns" ) + \ fromToken + \ tableNameList.setResultsName( "tables" ) ) test( "SELECT * from XYZZY, ABC" ) test( "select * from SYS.XYZZY" ) test( "Select A from Sys.dual" ) test( "Select AA,BB,CC from Sys.dual" ) test( "Select A, B, C from Sys.dual" ) test( "Select A, B, C from Sys.dual" ) test( "Xelect A, B, C from Sys.dual" ) test( "Select A, B, C frox Sys.dual" ) test( "Select" ) test( "Select ^^^ frox Sys.dual" ) test( "Select A, B, C from Sys.dual, Table2 " )