Paul McGuire
2/5/2008 3:27:00 PM
On Feb 5, 8:31 am, c...@aol.com wrote:
> Hello again -
>
> I do not seem to be able to get a handle on non-greedy pattern
> matching.
>
Regexps wont cut it when you have to parse nested ()'s in a logical
expression.
Here is a pyparsing solution. For your specific application, you will
need to sift through WHERE clauses too, you should be able to use this
as a starting point for creating a WHERE clause parser.
-- Paul
from pyparsing import *
# define terminal items
identifier = Word(alphas,alphanums+'_')
column_name = Combine( identifier + ZeroOrMore('.' + identifier) )
integer = Word(nums)
value = integer | sglQuotedString
relationalOperator = oneOf("< = > >= <= != <>")
simple_comparison = Group(column_name + relationalOperator +
(column_name | value))
AND_cl = CaselessLiteral("AND")
OR_cl = CaselessLiteral("OR")
NOT_cl = CaselessLiteral("NOT")
logical_expression = operatorPrecedence( simple_comparison,
[
(NOT_cl, 1, opAssoc.RIGHT),
(OR_cl, 2, opAssoc.LEFT),
(AND_cl, 2, opAssoc.LEFT),
])
# syntax for FROM ... join expression
FROM_cl = CaselessLiteral("FROM")
ON_cl = CaselessLiteral("ON")
INNER_cl = CaselessLiteral("INNER")
JOIN_cl = CaselessLiteral("JOIN")
join_expr = identifier('table1') + INNER_cl + JOIN_cl + identifier('table2') + ON_cl + logical_expression('join_condition')
from_clause = FROM_cl + '(' + join_expr + ')'
test = """ FROM (qry_Scores_Lookup1 INNER JOIN CSS_Rpt1 ON
(qry_Scores_Lookup1.desc = CSS_Rpt1.desc) AND
(qry_Scores_Lookup1.lastcdu = CSS_Rpt1.lastcdu))"""
# parsing the FROM clause normally
join_fields = from_clause.parseString(test)
print "\nParsed tokens, plus any named fields"
print join_fields.dump()
print "\nAccessing some named fields directly"
print "table1 =", join_fields.table1
print "join_condition =", join_fields.join_condition
# create parse action to tally up column names as they are found
# during the parsing process
def tally_names(tokens):
column_names.append(tokens[0])
column_name.setParseAction(tally_names)
# parse the FROM clause, but just let the parse action do the work
print "\nExtracting the column names"
column_names = []
from_clause.parseString(test)
print column_names
Prints:
Parsed tokens, plus any named fields
['FROM', '(', 'qry_Scores_Lookup1', 'INNER', 'JOIN', 'CSS_Rpt1', 'ON',
[['qry_Scores_Lookup1.desc', '=', 'CSS_Rpt1.desc'], 'AND',
['qry_Scores_Lookup1.lastcdu', '=', 'CSS_Rpt1.lastcdu']], ')']
- join_condition: [['qry_Scores_Lookup1.desc', '=', 'CSS_Rpt1.desc'],
'AND', ['qry_Scores_Lookup1.lastcdu', '=', 'CSS_Rpt1.lastcdu']]
- table1: qry_Scores_Lookup1
- table2: CSS_Rpt1
Accessing some named fields directly
table1 = qry_Scores_Lookup1
join_condition = [['qry_Scores_Lookup1.desc', '=', 'CSS_Rpt1.desc'],
'AND', ['qry_Scores_Lookup1.lastcdu', '=', 'CSS_Rpt1.lastcdu']]
Extracting the column names
['qry_Scores_Lookup1.desc', 'CSS_Rpt1.desc',
'qry_Scores_Lookup1.lastcdu', 'CSS_Rpt1.lastcdu']