summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorPaul McGuire <ptmcg@austin.rr.com>2019-07-13 13:36:36 -0500
committerPaul McGuire <ptmcg@austin.rr.com>2019-07-13 13:36:36 -0500
commit5a566b59170fb3fe705a7691806c4afd158df520 (patch)
tree7bf9c6077b3e0c03217b1c33055e1a7188cf5bcc /examples
parent7d96e569a1b5f4505dac8d6f24c4b27562acf875 (diff)
downloadpyparsing-git-5a566b59170fb3fe705a7691806c4afd158df520.tar.gz
Update/cleanup code in examples
Diffstat (limited to 'examples')
-rw-r--r--examples/LAparser.py876
-rw-r--r--examples/commasep.py7
-rw-r--r--examples/eval_arith.py456
-rw-r--r--examples/httpServerLogParser.py145
-rw-r--r--examples/lucene_grammar.py657
-rw-r--r--examples/protobuf_parser.py200
-rw-r--r--examples/removeLineBreaks.py90
7 files changed, 1244 insertions, 1187 deletions
diff --git a/examples/LAparser.py b/examples/LAparser.py
index 41e8b4f..330b8f5 100644
--- a/examples/LAparser.py
+++ b/examples/LAparser.py
@@ -1,414 +1,462 @@
-"""
-Purpose: Linear Algebra Parser
-Based on: SimpleCalc.py example (author Paul McGuire) in pyparsing-1.3.3
-Author: Mike Ellis
-Copyright: Ellis & Grant, Inc. 2005
-License: You may freely use, modify, and distribute this software.
-Warranty: THIS SOFTWARE HAS NO WARRANTY WHATSOEVER. USE AT YOUR OWN RISK.
-Notes: Parses infix linear algebra (LA) notation for vectors, matrices, and scalars.
- Output is C code function calls. The parser can be run as an interactive
- interpreter or included as module to use for in-place substitution into C files
- containing LA equations.
-
- Supported operations are:
- OPERATION: INPUT OUTPUT
- Scalar addition: "a = b+c" "a=(b+c)"
- Scalar subtraction: "a = b-c" "a=(b-c)"
- Scalar multiplication: "a = b*c" "a=b*c"
- Scalar division: "a = b/c" "a=b/c"
- Scalar exponentiation: "a = b^c" "a=pow(b,c)"
- Vector scaling: "V3_a = V3_b * c" "vCopy(a,vScale(b,c))"
- Vector addition: "V3_a = V3_b + V3_c" "vCopy(a,vAdd(b,c))"
- Vector subtraction: "V3_a = V3_b - V3_c" "vCopy(a,vSubtract(b,c))"
- Vector dot product: "a = V3_b * V3_c" "a=vDot(b,c)"
- Vector outer product: "M3_a = V3_b @ V3_c" "a=vOuterProduct(b,c)"
- Vector magn. squared: "a = V3_b^Mag2" "a=vMagnitude2(b)"
- Vector magnitude: "a = V3_b^Mag" "a=sqrt(vMagnitude2(b))"
- Matrix scaling: "M3_a = M3_b * c" "mCopy(a,mScale(b,c))"
- Matrix addition: "M3_a = M3_b + M3_c" "mCopy(a,mAdd(b,c))"
- Matrix subtraction: "M3_a = M3_b - M3_c" "mCopy(a,mSubtract(b,c))"
- Matrix multiplication: "M3_a = M3_b * M3_c" "mCopy(a,mMultiply(b,c))"
- Matrix by vector mult.: "V3_a = M3_b * V3_c" "vCopy(a,mvMultiply(b,c))"
- Matrix inversion: "M3_a = M3_b^-1" "mCopy(a,mInverse(b))"
- Matrix transpose: "M3_a = M3_b^T" "mCopy(a,mTranspose(b))"
- Matrix determinant: "a = M3_b^Det" "a=mDeterminant(b)"
-
- The parser requires the expression to be an equation. Each non-scalar variable
- must be prefixed with a type tag, 'M3_' for 3x3 matrices and 'V3_' for 3-vectors.
- For proper compilation of the C code, the variables need to be declared without
- the prefix as float[3] for vectors and float[3][3] for matrices. The operations do
- not modify any variables on the right-hand side of the equation.
-
- Equations may include nested expressions within parentheses. The allowed binary
- operators are '+-*/^' for scalars, and '+-*^@' for vectors and matrices with the
- meanings defined in the table above.
-
- Specifying an improper combination of operands, e.g. adding a vector to a matrix,
- is detected by the parser and results in a Python TypeError Exception. The usual cause
- of this is omitting one or more tag prefixes. The parser knows nothing about a
- a variable's C declaration and relies entirely on the type tags. Errors in C
- declarations are not caught until compile time.
-
-Usage: To process LA equations embedded in source files, import this module and
- pass input and output file objects to the fprocess() function. You can
- can also invoke the parser from the command line, e.g. 'python LAparser.py',
- to run a small test suite and enter an interactive loop where you can enter
- LA equations and see the resulting C code.
-
-"""
-
-import re,sys
-from pyparsing import Word, alphas, ParseException, Literal, CaselessLiteral \
-, Combine, Optional, nums, Forward, ZeroOrMore, \
- StringEnd, alphanums
-
-# Debugging flag can be set to either "debug_flag=True" or "debug_flag=False"
-debug_flag=False
-
-#----------------------------------------------------------------------------
-# Variables that hold intermediate parsing results and a couple of
-# helper functions.
-exprStack = [] # Holds operators and operands parsed from input.
-targetvar = None # Holds variable name to left of '=' sign in LA equation.
-
-
-def _pushFirst( str, loc, toks ):
- if debug_flag: print("pushing ", toks[0], "str is ", str)
- exprStack.append( toks[0] )
-
-def _assignVar( str, loc, toks ):
- global targetvar
- targetvar = toks[0]
-
-#-----------------------------------------------------------------------------
-# The following statements define the grammar for the parser.
-
-point = Literal('.')
-e = CaselessLiteral('E')
-plusorminus = Literal('+') | Literal('-')
-number = Word(nums)
-integer = Combine( Optional(plusorminus) + number )
-floatnumber = Combine( integer +
- Optional( point + Optional(number) ) +
- Optional( e + integer )
- )
-
-lbracket = Literal("[")
-rbracket = Literal("]")
-ident = Forward()
-## The definition below treats array accesses as identifiers. This means your expressions
-## can include references to array elements, rows and columns, e.g., a = b[i] + 5.
-## Expressions within []'s are not presently supported, so a = b[i+1] will raise
-## a ParseException.
-ident = Combine(Word(alphas + '-',alphanums + '_') + \
- ZeroOrMore(lbracket + (Word(alphas + '-',alphanums + '_')|integer) + rbracket) \
- )
-
-plus = Literal( "+" )
-minus = Literal( "-" )
-mult = Literal( "*" )
-div = Literal( "/" )
-outer = Literal( "@" )
-lpar = Literal( "(" ).suppress()
-rpar = Literal( ")" ).suppress()
-addop = plus | minus
-multop = mult | div | outer
-expop = Literal( "^" )
-assignop = Literal( "=" )
-
-expr = Forward()
-atom = ( ( e | floatnumber | integer | ident ).setParseAction(_pushFirst) |
- ( lpar + expr.suppress() + rpar )
- )
-factor = Forward()
-factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( _pushFirst ) )
-
-term = factor + ZeroOrMore( ( multop + factor ).setParseAction( _pushFirst ) )
-expr << term + ZeroOrMore( ( addop + term ).setParseAction( _pushFirst ) )
-equation = (ident + assignop).setParseAction(_assignVar) + expr + StringEnd()
-
-# End of grammar definition
-#-----------------------------------------------------------------------------
-## The following are helper variables and functions used by the Binary Infix Operator
-## Functions described below.
-
-vprefix = 'V3_'
-vplen = len(vprefix)
-mprefix = 'M3_'
-mplen = len(mprefix)
-
-## We don't support unary negation for vectors and matrices
-class UnaryUnsupportedError(Exception): pass
-
-def _isvec(ident):
- if ident[0] == '-' and ident[1:vplen+1] == vprefix:
- raise UnaryUnsupportedError
- else: return ident[0:vplen] == vprefix
-
-def _ismat(ident):
- if ident[0] == '-' and ident[1:mplen+1] == mprefix:
- raise UnaryUnsupportedError
- else: return ident[0:mplen] == mprefix
-
-def _isscalar(ident): return not (_isvec(ident) or _ismat(ident))
-
-## Binary infix operator (BIO) functions. These are called when the stack evaluator
-## pops a binary operator like '+' or '*". The stack evaluator pops the two operand, a and b,
-## and calls the function that is mapped to the operator with a and b as arguments. Thus,
-## 'x + y' yields a call to addfunc(x,y). Each of the BIO functions checks the prefixes of its
-## arguments to determine whether the operand is scalar, vector, or matrix. This information
-## is used to generate appropriate C code. For scalars, this is essentially the input string, e.g.
-## 'a + b*5' as input yields 'a + b*5' as output. For vectors and matrices, the input is translated to
-## nested function calls, e.g. "V3_a + V3_b*5" yields "V3_vAdd(a,vScale(b,5)". Note that prefixes are
-## stripped from operands and function names within the argument list to the outer function and
-## the appropriate prefix is placed on the outer function for removal later as the stack evaluation
-## recurses toward the final assignment statement.
-
-def _addfunc(a,b):
- if _isscalar(a) and _isscalar(b): return "(%s+%s)"%(a,b)
- if _isvec(a) and _isvec(b): return "%svAdd(%s,%s)"%(vprefix,a[vplen:],b[vplen:])
- if _ismat(a) and _ismat(b): return "%smAdd(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
- else: raise TypeError
-
-def _subfunc(a,b):
- if _isscalar(a) and _isscalar(b): return "(%s-%s)"%(a,b)
- if _isvec(a) and _isvec(b): return "%svSubtract(%s,%s)"%(vprefix,a[vplen:],b[vplen:])
- if _ismat(a) and _ismat(b): return "%smSubtract(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
- else: raise TypeError
-
-def _mulfunc(a,b):
- if _isscalar(a) and _isscalar(b): return "%s*%s"%(a,b)
- if _isvec(a) and _isvec(b): return "vDot(%s,%s)"%(a[vplen:],b[vplen:])
- if _ismat(a) and _ismat(b): return "%smMultiply(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
- if _ismat(a) and _isvec(b): return "%smvMultiply(%s,%s)"%(vprefix,a[mplen:],b[vplen:])
- if _ismat(a) and _isscalar(b): return "%smScale(%s,%s)"%(mprefix,a[mplen:],b)
- if _isvec(a) and _isscalar(b): return "%svScale(%s,%s)"%(vprefix,a[mplen:],b)
- else: raise TypeError
-
-def _outermulfunc(a,b):
- ## The '@' operator is used for the vector outer product.
- if _isvec(a) and _isvec(b):
- return "%svOuterProduct(%s,%s)"%(mprefix,a[vplen:],b[vplen:])
- else: raise TypeError
-
-def _divfunc(a,b):
- ## The '/' operator is used only for scalar division
- if _isscalar(a) and _isscalar(b): return "%s/%s"%(a,b)
- else: raise TypeError
-
-def _expfunc(a,b):
- ## The '^' operator is used for exponentiation on scalars and
- ## as a marker for unary operations on vectors and matrices.
- if _isscalar(a) and _isscalar(b): return "pow(%s,%s)"%(str(a),str(b))
- if _ismat(a) and b=='-1': return "%smInverse(%s)"%(mprefix,a[mplen:])
- if _ismat(a) and b=='T': return "%smTranspose(%s)"%(mprefix,a[mplen:])
- if _ismat(a) and b=='Det': return "mDeterminant(%s)"%(a[mplen:])
- if _isvec(a) and b=='Mag': return "sqrt(vMagnitude2(%s))"%(a[vplen:])
- if _isvec(a) and b=='Mag2': return "vMagnitude2(%s)"%(a[vplen:])
- else: raise TypeError
-
-def _assignfunc(a,b):
- ## The '=' operator is used for assignment
- if _isscalar(a) and _isscalar(b): return "%s=%s"%(a,b)
- if _isvec(a) and _isvec(b): return "vCopy(%s,%s)"%(a[vplen:],b[vplen:])
- if _ismat(a) and _ismat(b): return "mCopy(%s,%s)"%(a[mplen:],b[mplen:])
- else: raise TypeError
-
-## End of BIO func definitions
-##----------------------------------------------------------------------------
-
-# Map operator symbols to corresponding BIO funcs
-opn = { "+" : ( _addfunc ),
- "-" : ( _subfunc ),
- "*" : ( _mulfunc ),
- "@" : ( _outermulfunc ),
- "/" : ( _divfunc),
- "^" : ( _expfunc ), }
-
-
-##----------------------------------------------------------------------------
-# Recursive function that evaluates the expression stack
-def _evaluateStack( s ):
- op = s.pop()
- if op in "+-*/@^":
- op2 = _evaluateStack( s )
- op1 = _evaluateStack( s )
- result = opn[op]( op1, op2 )
- if debug_flag: print(result)
- return result
- else:
- return op
-
-##----------------------------------------------------------------------------
-# The parse function that invokes all of the above.
-def parse(input_string):
- """
- Accepts an input string containing an LA equation, e.g.,
- "M3_mymatrix = M3_anothermatrix^-1" returns C code function
- calls that implement the expression.
- """
-
- global exprStack
- global targetvar
-
- # Start with a blank exprStack and a blank targetvar
- exprStack = []
- targetvar=None
-
- if input_string != '':
- # try parsing the input string
- try:
- L=equation.parseString( input_string )
- except ParseException as err:
- print('Parse Failure', file=sys.stderr)
- print(err.line, file=sys.stderr)
- print(" "*(err.column-1) + "^", file=sys.stderr)
- print(err, file=sys.stderr)
- raise
-
- # show result of parsing the input string
- if debug_flag:
- print(input_string, "->", L)
- print("exprStack=", exprStack)
-
- # Evaluate the stack of parsed operands, emitting C code.
- try:
- result=_evaluateStack(exprStack)
- except TypeError:
- print("Unsupported operation on right side of '%s'.\nCheck for missing or incorrect tags on non-scalar operands."%input_string, file=sys.stderr)
- raise
- except UnaryUnsupportedError:
- print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr)
- raise
-
- # Create final assignment and print it.
- if debug_flag: print("var=",targetvar)
- if targetvar != None:
- try:
- result = _assignfunc(targetvar,result)
- except TypeError:
- print("Left side tag does not match right side of '%s'"%input_string, file=sys.stderr)
- raise
- except UnaryUnsupportedError:
- print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr)
- raise
-
- return result
- else:
- print("Empty left side in '%s'"%input_string, file=sys.stderr)
- raise TypeError
-
-##-----------------------------------------------------------------------------------
-def fprocess(infilep,outfilep):
- """
- Scans an input file for LA equations between double square brackets,
- e.g. [[ M3_mymatrix = M3_anothermatrix^-1 ]], and replaces the expression
- with a comment containing the equation followed by nested function calls
- that implement the equation as C code. A trailing semi-colon is appended.
- The equation within [[ ]] should NOT end with a semicolon as that will raise
- a ParseException. However, it is ok to have a semicolon after the right brackets.
-
- Other text in the file is unaltered.
-
- The arguments are file objects (NOT file names) opened for reading and
- writing, respectively.
- """
- pattern = r'\[\[\s*(.*?)\s*\]\]'
- eqn = re.compile(pattern,re.DOTALL)
- s = infilep.read()
- def parser(mo):
- ccode = parse(mo.group(1))
- return "/* %s */\n%s;\nLAParserBufferReset();\n"%(mo.group(1),ccode)
-
- content = eqn.sub(parser,s)
- outfilep.write(content)
-
-##-----------------------------------------------------------------------------------
-def test():
- """
- Tests the parsing of various supported expressions. Raises
- an AssertError if the output is not what is expected. Prints the
- input, expected output, and actual output for all tests.
- """
- print("Testing LAParser")
- testcases = [
- ("Scalar addition","a = b+c","a=(b+c)"),
- ("Vector addition","V3_a = V3_b + V3_c","vCopy(a,vAdd(b,c))"),
- ("Vector addition","V3_a=V3_b+V3_c","vCopy(a,vAdd(b,c))"),
- ("Matrix addition","M3_a = M3_b + M3_c","mCopy(a,mAdd(b,c))"),
- ("Matrix addition","M3_a=M3_b+M3_c","mCopy(a,mAdd(b,c))"),
- ("Scalar subtraction","a = b-c","a=(b-c)"),
- ("Vector subtraction","V3_a = V3_b - V3_c","vCopy(a,vSubtract(b,c))"),
- ("Matrix subtraction","M3_a = M3_b - M3_c","mCopy(a,mSubtract(b,c))"),
- ("Scalar multiplication","a = b*c","a=b*c"),
- ("Scalar division","a = b/c","a=b/c"),
- ("Vector multiplication (dot product)","a = V3_b * V3_c","a=vDot(b,c)"),
- ("Vector multiplication (outer product)","M3_a = V3_b @ V3_c","mCopy(a,vOuterProduct(b,c))"),
- ("Matrix multiplication","M3_a = M3_b * M3_c","mCopy(a,mMultiply(b,c))"),
- ("Vector scaling","V3_a = V3_b * c","vCopy(a,vScale(b,c))"),
- ("Matrix scaling","M3_a = M3_b * c","mCopy(a,mScale(b,c))"),
- ("Matrix by vector multiplication","V3_a = M3_b * V3_c","vCopy(a,mvMultiply(b,c))"),
- ("Scalar exponentiation","a = b^c","a=pow(b,c)"),
- ("Matrix inversion","M3_a = M3_b^-1","mCopy(a,mInverse(b))"),
- ("Matrix transpose","M3_a = M3_b^T","mCopy(a,mTranspose(b))"),
- ("Matrix determinant","a = M3_b^Det","a=mDeterminant(b)"),
- ("Vector magnitude squared","a = V3_b^Mag2","a=vMagnitude2(b)"),
- ("Vector magnitude","a = V3_b^Mag","a=sqrt(vMagnitude2(b))"),
- ("Complicated expression", "myscalar = (M3_amatrix * V3_bvector)^Mag + 5*(-xyz[i] + 2.03^2)","myscalar=(sqrt(vMagnitude2(mvMultiply(amatrix,bvector)))+5*(-xyz[i]+pow(2.03,2)))"),
- ("Complicated Multiline", "myscalar = \n(M3_amatrix * V3_bvector)^Mag +\n 5*(xyz + 2.03^2)","myscalar=(sqrt(vMagnitude2(mvMultiply(amatrix,bvector)))+5*(xyz+pow(2.03,2)))")
-
- ]
-
- for t in testcases:
- name,input,expected = t
- print(name)
- print(" %s input"%input)
- print(" %s expected"%expected)
- result = parse(input)
- print(" %s received"%result)
- print("")
- assert expected == result
-
- ##TODO: Write testcases with invalid expressions and test that the expected
- ## exceptions are raised.
-
- print("Tests completed!")
-##----------------------------------------------------------------------------
-## The following is executed only when this module is executed as
-## command line script. It runs a small test suite (see above)
-## and then enters an interactive loop where you
-## can enter expressions and see the resulting C code as output.
-
-if __name__ == '__main__':
- # run testcases
- test()
-
- # input_string
- input_string=''
-
- # Display instructions on how to use the program interactively
- interactiveusage = """
- Entering interactive mode:
- Type in an equation to be parsed or 'quit' to exit the program.
- Type 'debug on' to print parsing details as each string is processed.
- Type 'debug off' to stop printing parsing details
- """
- print(interactiveusage)
- input_string = input("> ")
-
- while input_string != 'quit':
- if input_string == "debug on":
- debug_flag = True
- elif input_string == "debug off":
- debug_flag = False
- else:
- try:
- print(parse(input_string))
- except Exception:
- pass
-
- # obtain new input string
- input_string = input("> ")
-
- # if user types 'quit' then say goodbye
- print("Good bye!")
+"""
+Purpose: Linear Algebra Parser
+Based on: SimpleCalc.py example (author Paul McGuire) in pyparsing-1.3.3
+Author: Mike Ellis
+Copyright: Ellis & Grant, Inc. 2005
+License: You may freely use, modify, and distribute this software.
+Warranty: THIS SOFTWARE HAS NO WARRANTY WHATSOEVER. USE AT YOUR OWN RISK.
+Notes: Parses infix linear algebra (LA) notation for vectors, matrices, and scalars.
+ Output is C code function calls. The parser can be run as an interactive
+ interpreter or included as module to use for in-place substitution into C files
+ containing LA equations.
+
+ Supported operations are:
+ OPERATION: INPUT OUTPUT
+ Scalar addition: "a = b+c" "a=(b+c)"
+ Scalar subtraction: "a = b-c" "a=(b-c)"
+ Scalar multiplication: "a = b*c" "a=b*c"
+ Scalar division: "a = b/c" "a=b/c"
+ Scalar exponentiation: "a = b^c" "a=pow(b,c)"
+ Vector scaling: "V3_a = V3_b * c" "vCopy(a,vScale(b,c))"
+ Vector addition: "V3_a = V3_b + V3_c" "vCopy(a,vAdd(b,c))"
+ Vector subtraction: "V3_a = V3_b - V3_c" "vCopy(a,vSubtract(b,c))"
+ Vector dot product: "a = V3_b * V3_c" "a=vDot(b,c)"
+ Vector outer product: "M3_a = V3_b @ V3_c" "a=vOuterProduct(b,c)"
+ Vector magn. squared: "a = V3_b^Mag2" "a=vMagnitude2(b)"
+ Vector magnitude: "a = V3_b^Mag" "a=sqrt(vMagnitude2(b))"
+ Matrix scaling: "M3_a = M3_b * c" "mCopy(a,mScale(b,c))"
+ Matrix addition: "M3_a = M3_b + M3_c" "mCopy(a,mAdd(b,c))"
+ Matrix subtraction: "M3_a = M3_b - M3_c" "mCopy(a,mSubtract(b,c))"
+ Matrix multiplication: "M3_a = M3_b * M3_c" "mCopy(a,mMultiply(b,c))"
+ Matrix by vector mult.: "V3_a = M3_b * V3_c" "vCopy(a,mvMultiply(b,c))"
+ Matrix inversion: "M3_a = M3_b^-1" "mCopy(a,mInverse(b))"
+ Matrix transpose: "M3_a = M3_b^T" "mCopy(a,mTranspose(b))"
+ Matrix determinant: "a = M3_b^Det" "a=mDeterminant(b)"
+
+ The parser requires the expression to be an equation. Each non-scalar variable
+ must be prefixed with a type tag, 'M3_' for 3x3 matrices and 'V3_' for 3-vectors.
+ For proper compilation of the C code, the variables need to be declared without
+ the prefix as float[3] for vectors and float[3][3] for matrices. The operations do
+ not modify any variables on the right-hand side of the equation.
+
+ Equations may include nested expressions within parentheses. The allowed binary
+ operators are '+-*/^' for scalars, and '+-*^@' for vectors and matrices with the
+ meanings defined in the table above.
+
+ Specifying an improper combination of operands, e.g. adding a vector to a matrix,
+ is detected by the parser and results in a Python TypeError Exception. The usual cause
+ of this is omitting one or more tag prefixes. The parser knows nothing about a
+ a variable's C declaration and relies entirely on the type tags. Errors in C
+ declarations are not caught until compile time.
+
+Usage: To process LA equations embedded in source files, import this module and
+ pass input and output file objects to the fprocess() function. You can
+ can also invoke the parser from the command line, e.g. 'python LAparser.py',
+ to run a small test suite and enter an interactive loop where you can enter
+ LA equations and see the resulting C code.
+
+"""
+
+import re,sys
+from pyparsing import Word, alphas, ParseException, Literal, CaselessLiteral \
+, Combine, Optional, nums, Forward, ZeroOrMore, \
+ StringEnd, alphanums
+
+# Debugging flag can be set to either "debug_flag=True" or "debug_flag=False"
+debug_flag=False
+
+#----------------------------------------------------------------------------
+# Variables that hold intermediate parsing results and a couple of
+# helper functions.
+exprStack = [] # Holds operators and operands parsed from input.
+targetvar = None # Holds variable name to left of '=' sign in LA equation.
+
+
+def _pushFirst( str, loc, toks ):
+ if debug_flag: print("pushing ", toks[0], "str is ", str)
+ exprStack.append( toks[0] )
+
+def _assignVar( str, loc, toks ):
+ global targetvar
+ targetvar = toks[0]
+
+#-----------------------------------------------------------------------------
+# The following statements define the grammar for the parser.
+
+point = Literal('.')
+e = CaselessLiteral('E')
+plusorminus = Literal('+') | Literal('-')
+number = Word(nums)
+integer = Combine( Optional(plusorminus) + number )
+floatnumber = Combine( integer +
+ Optional( point + Optional(number) ) +
+ Optional( e + integer )
+ )
+
+lbracket = Literal("[")
+rbracket = Literal("]")
+ident = Forward()
+## The definition below treats array accesses as identifiers. This means your expressions
+## can include references to array elements, rows and columns, e.g., a = b[i] + 5.
+## Expressions within []'s are not presently supported, so a = b[i+1] will raise
+## a ParseException.
+ident = Combine(Word(alphas + '-',alphanums + '_') + \
+ ZeroOrMore(lbracket + (Word(alphas + '-',alphanums + '_')|integer) + rbracket) \
+ )
+
+plus = Literal( "+" )
+minus = Literal( "-" )
+mult = Literal( "*" )
+div = Literal( "/" )
+outer = Literal( "@" )
+lpar = Literal( "(" ).suppress()
+rpar = Literal( ")" ).suppress()
+addop = plus | minus
+multop = mult | div | outer
+expop = Literal( "^" )
+assignop = Literal( "=" )
+
+expr = Forward()
+atom = ( ( e | floatnumber | integer | ident ).setParseAction(_pushFirst) |
+ ( lpar + expr.suppress() + rpar )
+ )
+factor = Forward()
+factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( _pushFirst ) )
+
+term = factor + ZeroOrMore( ( multop + factor ).setParseAction( _pushFirst ) )
+expr << term + ZeroOrMore( ( addop + term ).setParseAction( _pushFirst ) )
+equation = (ident + assignop).setParseAction(_assignVar) + expr + StringEnd()
+
+# End of grammar definition
+#-----------------------------------------------------------------------------
+## The following are helper variables and functions used by the Binary Infix Operator
+## Functions described below.
+
+vprefix = 'V3_'
+vplen = len(vprefix)
+mprefix = 'M3_'
+mplen = len(mprefix)
+
+## We don't support unary negation for vectors and matrices
+class UnaryUnsupportedError(Exception): pass
+
+def _isvec(ident):
+ if ident[0] == '-' and ident[1:vplen+1] == vprefix:
+ raise UnaryUnsupportedError
+ else: return ident[0:vplen] == vprefix
+
+def _ismat(ident):
+ if ident[0] == '-' and ident[1:mplen+1] == mprefix:
+ raise UnaryUnsupportedError
+ else: return ident[0:mplen] == mprefix
+
+def _isscalar(ident): return not (_isvec(ident) or _ismat(ident))
+
+## Binary infix operator (BIO) functions. These are called when the stack evaluator
+## pops a binary operator like '+' or '*". The stack evaluator pops the two operand, a and b,
+## and calls the function that is mapped to the operator with a and b as arguments. Thus,
+## 'x + y' yields a call to addfunc(x,y). Each of the BIO functions checks the prefixes of its
+## arguments to determine whether the operand is scalar, vector, or matrix. This information
+## is used to generate appropriate C code. For scalars, this is essentially the input string, e.g.
+## 'a + b*5' as input yields 'a + b*5' as output. For vectors and matrices, the input is translated to
+## nested function calls, e.g. "V3_a + V3_b*5" yields "V3_vAdd(a,vScale(b,5)". Note that prefixes are
+## stripped from operands and function names within the argument list to the outer function and
+## the appropriate prefix is placed on the outer function for removal later as the stack evaluation
+## recurses toward the final assignment statement.
+
+def _addfunc(a,b):
+ if _isscalar(a) and _isscalar(b): return "(%s+%s)"%(a,b)
+ if _isvec(a) and _isvec(b): return "%svAdd(%s,%s)"%(vprefix,a[vplen:],b[vplen:])
+ if _ismat(a) and _ismat(b): return "%smAdd(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
+ else: raise TypeError
+
+def _subfunc(a,b):
+ if _isscalar(a) and _isscalar(b): return "(%s-%s)"%(a,b)
+ if _isvec(a) and _isvec(b): return "%svSubtract(%s,%s)"%(vprefix,a[vplen:],b[vplen:])
+ if _ismat(a) and _ismat(b): return "%smSubtract(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
+ else: raise TypeError
+
+def _mulfunc(a,b):
+ if _isscalar(a) and _isscalar(b): return "%s*%s"%(a,b)
+ if _isvec(a) and _isvec(b): return "vDot(%s,%s)"%(a[vplen:],b[vplen:])
+ if _ismat(a) and _ismat(b): return "%smMultiply(%s,%s)"%(mprefix,a[mplen:],b[mplen:])
+ if _ismat(a) and _isvec(b): return "%smvMultiply(%s,%s)"%(vprefix,a[mplen:],b[vplen:])
+ if _ismat(a) and _isscalar(b): return "%smScale(%s,%s)"%(mprefix,a[mplen:],b)
+ if _isvec(a) and _isscalar(b): return "%svScale(%s,%s)"%(vprefix,a[mplen:],b)
+ else: raise TypeError
+
+def _outermulfunc(a,b):
+ ## The '@' operator is used for the vector outer product.
+ if _isvec(a) and _isvec(b):
+ return "%svOuterProduct(%s,%s)"%(mprefix,a[vplen:],b[vplen:])
+ else: raise TypeError
+
+def _divfunc(a,b):
+ ## The '/' operator is used only for scalar division
+ if _isscalar(a) and _isscalar(b): return "%s/%s"%(a,b)
+ else: raise TypeError
+
+def _expfunc(a,b):
+ ## The '^' operator is used for exponentiation on scalars and
+ ## as a marker for unary operations on vectors and matrices.
+ if _isscalar(a) and _isscalar(b): return "pow(%s,%s)"%(str(a),str(b))
+ if _ismat(a) and b=='-1': return "%smInverse(%s)"%(mprefix,a[mplen:])
+ if _ismat(a) and b=='T': return "%smTranspose(%s)"%(mprefix,a[mplen:])
+ if _ismat(a) and b=='Det': return "mDeterminant(%s)"%(a[mplen:])
+ if _isvec(a) and b=='Mag': return "sqrt(vMagnitude2(%s))"%(a[vplen:])
+ if _isvec(a) and b=='Mag2': return "vMagnitude2(%s)"%(a[vplen:])
+ else: raise TypeError
+
+def _assignfunc(a,b):
+ ## The '=' operator is used for assignment
+ if _isscalar(a) and _isscalar(b): return "%s=%s"%(a,b)
+ if _isvec(a) and _isvec(b): return "vCopy(%s,%s)"%(a[vplen:],b[vplen:])
+ if _ismat(a) and _ismat(b): return "mCopy(%s,%s)"%(a[mplen:],b[mplen:])
+ else: raise TypeError
+
+## End of BIO func definitions
+##----------------------------------------------------------------------------
+
+# Map operator symbols to corresponding BIO funcs
+opn = { "+" : ( _addfunc ),
+ "-" : ( _subfunc ),
+ "*" : ( _mulfunc ),
+ "@" : ( _outermulfunc ),
+ "/" : ( _divfunc),
+ "^" : ( _expfunc ), }
+
+
+##----------------------------------------------------------------------------
+# Recursive function that evaluates the expression stack
+def _evaluateStack( s ):
+ op = s.pop()
+ if op in "+-*/@^":
+ op2 = _evaluateStack( s )
+ op1 = _evaluateStack( s )
+ result = opn[op]( op1, op2 )
+ if debug_flag: print(result)
+ return result
+ else:
+ return op
+
+##----------------------------------------------------------------------------
+# The parse function that invokes all of the above.
+def parse(input_string):
+ """
+ Accepts an input string containing an LA equation, e.g.,
+ "M3_mymatrix = M3_anothermatrix^-1" returns C code function
+ calls that implement the expression.
+ """
+
+ global exprStack
+ global targetvar
+
+ # Start with a blank exprStack and a blank targetvar
+ exprStack = []
+ targetvar=None
+
+ if input_string != '':
+ # try parsing the input string
+ try:
+ L=equation.parseString( input_string )
+ except ParseException as err:
+ print('Parse Failure', file=sys.stderr)
+ print(err.line, file=sys.stderr)
+ print(" "*(err.column-1) + "^", file=sys.stderr)
+ print(err, file=sys.stderr)
+ raise
+
+ # show result of parsing the input string
+ if debug_flag:
+ print(input_string, "->", L)
+ print("exprStack=", exprStack)
+
+ # Evaluate the stack of parsed operands, emitting C code.
+ try:
+ result=_evaluateStack(exprStack)
+ except TypeError:
+ print("Unsupported operation on right side of '%s'.\nCheck for missing or incorrect tags on non-scalar operands."%input_string, file=sys.stderr)
+ raise
+ except UnaryUnsupportedError:
+ print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr)
+ raise
+
+ # Create final assignment and print it.
+ if debug_flag: print("var=",targetvar)
+ if targetvar != None:
+ try:
+ result = _assignfunc(targetvar,result)
+ except TypeError:
+ print("Left side tag does not match right side of '%s'"%input_string, file=sys.stderr)
+ raise
+ except UnaryUnsupportedError:
+ print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr)
+ raise
+
+ return result
+ else:
+ print("Empty left side in '%s'"%input_string, file=sys.stderr)
+ raise TypeError
+
+##-----------------------------------------------------------------------------------
+def fprocess(infilep,outfilep):
+ """
+ Scans an input file for LA equations between double square brackets,
+ e.g. [[ M3_mymatrix = M3_anothermatrix^-1 ]], and replaces the expression
+ with a comment containing the equation followed by nested function calls
+ that implement the equation as C code. A trailing semi-colon is appended.
+ The equation within [[ ]] should NOT end with a semicolon as that will raise
+ a ParseException. However, it is ok to have a semicolon after the right brackets.
+
+ Other text in the file is unaltered.
+
+ The arguments are file objects (NOT file names) opened for reading and
+ writing, respectively.
+ """
+ pattern = r'\[\[\s*(.*?)\s*\]\]'
+ eqn = re.compile(pattern,re.DOTALL)
+ s = infilep.read()
+ def parser(mo):
+ ccode = parse(mo.group(1))
+ return "/* %s */\n%s;\nLAParserBufferReset();\n"%(mo.group(1),ccode)
+
+ content = eqn.sub(parser,s)
+ outfilep.write(content)
+
+##-----------------------------------------------------------------------------------
+def test():
+ """
+ Tests the parsing of various supported expressions. Raises
+ an AssertError if the output is not what is expected. Prints the
+ input, expected output, and actual output for all tests.
+ """
+ print("Testing LAParser")
+ testcases = [
+ ("Scalar addition","a = b+c","a=(b+c)"),
+ ("Vector addition","V3_a = V3_b + V3_c","vCopy(a,vAdd(b,c))"),
+ ("Vector addition","V3_a=V3_b+V3_c","vCopy(a,vAdd(b,c))"),
+ ("Matrix addition","M3_a = M3_b + M3_c","mCopy(a,mAdd(b,c))"),
+ ("Matrix addition","M3_a=M3_b+M3_c","mCopy(a,mAdd(b,c))"),
+ ("Scalar subtraction","a = b-c","a=(b-c)"),
+ ("Vector subtraction","V3_a = V3_b - V3_c","vCopy(a,vSubtract(b,c))"),
+ ("Matrix subtraction","M3_a = M3_b - M3_c","mCopy(a,mSubtract(b,c))"),
+ ("Scalar multiplication","a = b*c","a=b*c"),
+ ("Scalar division","a = b/c","a=b/c"),
+ ("Vector multiplication (dot product)","a = V3_b * V3_c","a=vDot(b,c)"),
+ ("Vector multiplication (outer product)","M3_a = V3_b @ V3_c","mCopy(a,vOuterProduct(b,c))"),
+ ("Matrix multiplication","M3_a = M3_b * M3_c","mCopy(a,mMultiply(b,c))"),
+ ("Vector scaling","V3_a = V3_b * c","vCopy(a,vScale(b,c))"),
+ ("Matrix scaling","M3_a = M3_b * c","mCopy(a,mScale(b,c))"),
+ ("Matrix by vector multiplication","V3_a = M3_b * V3_c","vCopy(a,mvMultiply(b,c))"),
+ ("Scalar exponentiation","a = b^c","a=pow(b,c)"),
+ ("Matrix inversion","M3_a = M3_b^-1","mCopy(a,mInverse(b))"),
+ ("Matrix transpose","M3_a = M3_b^T","mCopy(a,mTranspose(b))"),
+ ("Matrix determinant","a = M3_b^Det","a=mDeterminant(b)"),
+ ("Vector magnitude squared","a = V3_b^Mag2","a=vMagnitude2(b)"),
+ ("Vector magnitude","a = V3_b^Mag","a=sqrt(vMagnitude2(b))"),
+ ("Complicated expression", "myscalar = (M3_amatrix * V3_bvector)^Mag + 5*(-xyz[i] + 2.03^2)","myscalar=(sqrt(vMagnitude2(mvMultiply(amatrix,bvector)))+5*(-xyz[i]+pow(2.03,2)))"),
+ ("Complicated Multiline", "myscalar = \n(M3_amatrix * V3_bvector)^Mag +\n 5*(xyz + 2.03^2)","myscalar=(sqrt(vMagnitude2(mvMultiply(amatrix,bvector)))+5*(xyz+pow(2.03,2)))")
+
+ ]
+
+
+ all_passed = [True]
+
+ def post_test(test, parsed):
+
+ # copy exprStack to evaluate and clear before running next test
+ parsed_stack = exprStack[:]
+ exprStack.clear()
+
+ name, testcase, expected = next(tc for tc in testcases if tc[1] == test)
+
+ this_test_passed = False
+ try:
+ try:
+ result=_evaluateStack(parsed_stack)
+ except TypeError:
+ print("Unsupported operation on right side of '%s'.\nCheck for missing or incorrect tags on non-scalar operands."%input_string, file=sys.stderr)
+ raise
+ except UnaryUnsupportedError:
+ print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr)
+ raise
+
+ # Create final assignment and print it.
+ if debug_flag: print("var=",targetvar)
+ if targetvar != None:
+ try:
+ result = _assignfunc(targetvar,result)
+ except TypeError:
+ print("Left side tag does not match right side of '%s'"%input_string, file=sys.stderr)
+ raise
+ except UnaryUnsupportedError:
+ print("Unary negation is not supported for vectors and matrices: '%s'"%input_string, file=sys.stderr)
+ raise
+
+ else:
+ print("Empty left side in '%s'"%input_string, file=sys.stderr)
+ raise TypeError
+
+ parsed['result'] = result
+ parsed['passed'] = this_test_passed = result == expected
+
+ finally:
+ all_passed[0] = all_passed[0] and this_test_passed
+ print('\n' + name)
+
+ equation.runTests((t[1] for t in testcases), postParse=post_test)
+
+
+ ##TODO: Write testcases with invalid expressions and test that the expected
+ ## exceptions are raised.
+
+ print("Tests completed!")
+ print("PASSED" if all_passed[0] else "FAILED")
+ assert all_passed[0]
+
+##----------------------------------------------------------------------------
+## The following is executed only when this module is executed as
+## command line script. It runs a small test suite (see above)
+## and then enters an interactive loop where you
+## can enter expressions and see the resulting C code as output.
+
+if __name__ == '__main__':
+
+ import sys
+ if not sys.flags.interactive:
+ # run testcases
+ test()
+ sys.exit(0)
+
+ # input_string
+ input_string=''
+
+ # Display instructions on how to use the program interactively
+ interactiveusage = """
+ Entering interactive mode:
+ Type in an equation to be parsed or 'quit' to exit the program.
+ Type 'debug on' to print parsing details as each string is processed.
+ Type 'debug off' to stop printing parsing details
+ """
+ print(interactiveusage)
+ input_string = input("> ")
+
+ while input_string != 'quit':
+ if input_string == "debug on":
+ debug_flag = True
+ elif input_string == "debug off":
+ debug_flag = False
+ else:
+ try:
+ print(parse(input_string))
+ except Exception:
+ pass
+
+ # obtain new input string
+ input_string = input("> ")
+
+ # if user types 'quit' then say goodbye
+ print("Good bye!")
+ import os
+ os._exit(0)
+
diff --git a/examples/commasep.py b/examples/commasep.py
index eae6dc1..067647d 100644
--- a/examples/commasep.py
+++ b/examples/commasep.py
@@ -1,7 +1,7 @@
# commasep.py
#
# comma-separated list example, to illustrate the advantages of using
-# the pyparsing commaSeparatedList as opposed to string.split(","):
+# the pyparsing comma_separated_list as opposed to string.split(","):
# - leading and trailing whitespace is implicitly trimmed from list elements
# - list elements can be quoted strings, which can safely contain commas without breaking
# into separate elements
@@ -9,7 +9,8 @@
# Copyright (c) 2004-2016, Paul McGuire
#
-from pyparsing import commaSeparatedList
+import pyparsing as pp
+ppc = pp.pyparsing_common
testData = [
"a,b,c,100.2,,3",
@@ -20,4 +21,4 @@ testData = [
"",
]
-commaSeparatedList.runTests(testData)
+ppc.comma_separated_list.runTests(testData)
diff --git a/examples/eval_arith.py b/examples/eval_arith.py
index 133f6c2..0896c01 100644
--- a/examples/eval_arith.py
+++ b/examples/eval_arith.py
@@ -1,227 +1,229 @@
-# eval_arith.py
-#
-# Copyright 2009, 2011 Paul McGuire
-#
-# Expansion on the pyparsing example simpleArith.py, to include evaluation
-# of the parsed tokens.
-#
-# Added support for exponentiation, using right-to-left evaluation of
-# operands
-#
-from pyparsing import Word, nums, alphas, Combine, oneOf, \
- opAssoc, infixNotation, Literal
-
-class EvalConstant(object):
- "Class to evaluate a parsed constant or variable"
- vars_ = {}
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- if self.value in EvalConstant.vars_:
- return EvalConstant.vars_[self.value]
- else:
- return float(self.value)
-
-class EvalSignOp(object):
- "Class to evaluate expressions with a leading + or - sign"
- def __init__(self, tokens):
- self.sign, self.value = tokens[0]
- def eval(self):
- mult = {'+':1, '-':-1}[self.sign]
- return mult * self.value.eval()
-
-def operatorOperands(tokenlist):
- "generator to extract operators and operands in pairs"
- it = iter(tokenlist)
- while 1:
- try:
- yield (next(it), next(it))
- except StopIteration:
- break
-
-class EvalPowerOp(object):
- "Class to evaluate multiplication and division expressions"
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- res = self.value[-1].eval()
- for val in self.value[-3::-2]:
- res = val.eval()**res
- return res
-
-class EvalMultOp(object):
- "Class to evaluate multiplication and division expressions"
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- prod = self.value[0].eval()
- for op,val in operatorOperands(self.value[1:]):
- if op == '*':
- prod *= val.eval()
- if op == '/':
- prod /= val.eval()
- return prod
-
-class EvalAddOp(object):
- "Class to evaluate addition and subtraction expressions"
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- sum = self.value[0].eval()
- for op,val in operatorOperands(self.value[1:]):
- if op == '+':
- sum += val.eval()
- if op == '-':
- sum -= val.eval()
- return sum
-
-class EvalComparisonOp(object):
- "Class to evaluate comparison expressions"
- opMap = {
- "<" : lambda a,b : a < b,
- "<=" : lambda a,b : a <= b,
- ">" : lambda a,b : a > b,
- ">=" : lambda a,b : a >= b,
- "!=" : lambda a,b : a != b,
- "=" : lambda a,b : a == b,
- "LT" : lambda a,b : a < b,
- "LE" : lambda a,b : a <= b,
- "GT" : lambda a,b : a > b,
- "GE" : lambda a,b : a >= b,
- "NE" : lambda a,b : a != b,
- "EQ" : lambda a,b : a == b,
- "<>" : lambda a,b : a != b,
- }
- def __init__(self, tokens):
- self.value = tokens[0]
- def eval(self):
- val1 = self.value[0].eval()
- for op,val in operatorOperands(self.value[1:]):
- fn = EvalComparisonOp.opMap[op]
- val2 = val.eval()
- if not fn(val1,val2):
- break
- val1 = val2
- else:
- return True
- return False
-
-
-# define the parser
-integer = Word(nums)
-real = Combine(Word(nums) + "." + Word(nums))
-variable = Word(alphas,exact=1)
-operand = real | integer | variable
-
-signop = oneOf('+ -')
-multop = oneOf('* /')
-plusop = oneOf('+ -')
-expop = Literal('**')
-
-# use parse actions to attach EvalXXX constructors to sub-expressions
-operand.setParseAction(EvalConstant)
-arith_expr = infixNotation(operand,
- [
- (signop, 1, opAssoc.RIGHT, EvalSignOp),
- (expop, 2, opAssoc.LEFT, EvalPowerOp),
- (multop, 2, opAssoc.LEFT, EvalMultOp),
- (plusop, 2, opAssoc.LEFT, EvalAddOp),
- ])
-
-comparisonop = oneOf("< <= > >= != = <> LT GT LE GE EQ NE")
-comp_expr = infixNotation(arith_expr,
- [
- (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp),
- ])
-
-def main():
- # sample expressions posted on comp.lang.python, asking for advice
- # in safely evaluating them
- rules=[
- '( A - B ) = 0',
- '(A + B + C + D + E + F + G + H + I) = J',
- '(A + B + C + D + E + F + G + H) = I',
- '(A + B + C + D + E + F) = G',
- '(A + B + C + D + E) = (F + G + H + I + J)',
- '(A + B + C + D + E) = (F + G + H + I)',
- '(A + B + C + D + E) = F',
- '(A + B + C + D) = (E + F + G + H)',
- '(A + B + C) = (D + E + F)',
- '(A + B) = (C + D + E + F)',
- '(A + B) = (C + D)',
- '(A + B) = (C - D + E - F - G + H + I + J)',
- '(A + B) = C',
- '(A + B) = 0',
- '(A+B+C+D+E) = (F+G+H+I+J)',
- '(A+B+C+D) = (E+F+G+H)',
- '(A+B+C+D)=(E+F+G+H)',
- '(A+B+C)=(D+E+F)',
- '(A+B)=(C+D)',
- '(A+B)=C',
- '(A-B)=C',
- '(A/(B+C))',
- '(B/(C+D))',
- '(G + H) = I',
- '-0.99 LE ((A+B+C)-(D+E+F+G)) LE 0.99',
- '-0.99 LE (A-(B+C)) LE 0.99',
- '-1000.00 LE A LE 0.00',
- '-5000.00 LE A LE 0.00',
- 'A < B',
- 'A < 7000',
- 'A = -(B)',
- 'A = C',
- 'A = 0',
- 'A GT 0',
- 'A GT 0.00',
- 'A GT 7.00',
- 'A LE B',
- 'A LT -1000.00',
- 'A LT -5000',
- 'A LT 0',
- 'A=(B+C+D)',
- 'A=B',
- 'I = (G + H)',
- '0.00 LE A LE 4.00',
- '4.00 LT A LE 7.00',
- '0.00 LE A LE 4.00 LE E > D',
- '2**2**(A+3)',
- ]
- vars_={'A': 0, 'B': 1.1, 'C': 2.2, 'D': 3.3, 'E': 4.4, 'F': 5.5, 'G':
- 6.6, 'H':7.7, 'I':8.8, 'J':9.9}
-
- # define tests from given rules
- tests = []
- for t in rules:
- t_orig = t
- t = t.replace("=","==")
- t = t.replace("EQ","==")
- t = t.replace("LE","<=")
- t = t.replace("GT",">")
- t = t.replace("LT","<")
- t = t.replace("GE",">=")
- t = t.replace("LE","<=")
- t = t.replace("NE","!=")
- t = t.replace("<>","!=")
- tests.append( (t_orig,eval(t,vars_)) )
-
- # copy vars_ to EvalConstant lookup dict
- EvalConstant.vars_ = vars_
- failed = 0
- for test,expected in tests:
- ret = comp_expr.parseString(test)[0]
- parsedvalue = ret.eval()
- print(test, expected, parsedvalue)
- if parsedvalue != expected:
- print("<<< FAIL")
- failed += 1
- else:
- print('')
-
- print('')
- if failed:
- print(failed, "tests FAILED")
- else:
- print("all tests PASSED")
-
-if __name__=='__main__':
- main()
+# eval_arith.py
+#
+# Copyright 2009, 2011 Paul McGuire
+#
+# Expansion on the pyparsing example simpleArith.py, to include evaluation
+# of the parsed tokens.
+#
+# Added support for exponentiation, using right-to-left evaluation of
+# operands
+#
+from pyparsing import Word, nums, alphas, Combine, oneOf, \
+ opAssoc, infixNotation, Literal
+
+class EvalConstant(object):
+ "Class to evaluate a parsed constant or variable"
+ vars_ = {}
+ def __init__(self, tokens):
+ self.value = tokens[0]
+ def eval(self):
+ if self.value in EvalConstant.vars_:
+ return EvalConstant.vars_[self.value]
+ else:
+ return float(self.value)
+
+class EvalSignOp(object):
+ "Class to evaluate expressions with a leading + or - sign"
+ def __init__(self, tokens):
+ self.sign, self.value = tokens[0]
+ def eval(self):
+ mult = {'+':1, '-':-1}[self.sign]
+ return mult * self.value.eval()
+
+def operatorOperands(tokenlist):
+ "generator to extract operators and operands in pairs"
+ it = iter(tokenlist)
+ while 1:
+ try:
+ yield (next(it), next(it))
+ except StopIteration:
+ break
+
+class EvalPowerOp(object):
+ "Class to evaluate multiplication and division expressions"
+ def __init__(self, tokens):
+ self.value = tokens[0]
+ def eval(self):
+ res = self.value[-1].eval()
+ for val in self.value[-3::-2]:
+ res = val.eval()**res
+ return res
+
+class EvalMultOp(object):
+ "Class to evaluate multiplication and division expressions"
+ def __init__(self, tokens):
+ self.value = tokens[0]
+ def eval(self):
+ prod = self.value[0].eval()
+ for op,val in operatorOperands(self.value[1:]):
+ if op == '*':
+ prod *= val.eval()
+ if op == '/':
+ prod /= val.eval()
+ return prod
+
+class EvalAddOp(object):
+ "Class to evaluate addition and subtraction expressions"
+ def __init__(self, tokens):
+ self.value = tokens[0]
+ def eval(self):
+ sum = self.value[0].eval()
+ for op,val in operatorOperands(self.value[1:]):
+ if op == '+':
+ sum += val.eval()
+ if op == '-':
+ sum -= val.eval()
+ return sum
+
+class EvalComparisonOp(object):
+ "Class to evaluate comparison expressions"
+ opMap = {
+ "<" : lambda a,b : a < b,
+ "<=" : lambda a,b : a <= b,
+ ">" : lambda a,b : a > b,
+ ">=" : lambda a,b : a >= b,
+ "!=" : lambda a,b : a != b,
+ "=" : lambda a,b : a == b,
+ "LT" : lambda a,b : a < b,
+ "LE" : lambda a,b : a <= b,
+ "GT" : lambda a,b : a > b,
+ "GE" : lambda a,b : a >= b,
+ "NE" : lambda a,b : a != b,
+ "EQ" : lambda a,b : a == b,
+ "<>" : lambda a,b : a != b,
+ }
+ def __init__(self, tokens):
+ self.value = tokens[0]
+ def eval(self):
+ val1 = self.value[0].eval()
+ for op,val in operatorOperands(self.value[1:]):
+ fn = EvalComparisonOp.opMap[op]
+ val2 = val.eval()
+ if not fn(val1,val2):
+ break
+ val1 = val2
+ else:
+ return True
+ return False
+
+
+# define the parser
+integer = Word(nums)
+real = Combine(Word(nums) + "." + Word(nums))
+variable = Word(alphas,exact=1)
+operand = real | integer | variable
+
+signop = oneOf('+ -')
+multop = oneOf('* /')
+plusop = oneOf('+ -')
+expop = Literal('**')
+
+# use parse actions to attach EvalXXX constructors to sub-expressions
+operand.setParseAction(EvalConstant)
+arith_expr = infixNotation(operand,
+ [
+ (signop, 1, opAssoc.RIGHT, EvalSignOp),
+ (expop, 2, opAssoc.LEFT, EvalPowerOp),
+ (multop, 2, opAssoc.LEFT, EvalMultOp),
+ (plusop, 2, opAssoc.LEFT, EvalAddOp),
+ ])
+
+comparisonop = oneOf("< <= > >= != = <> LT GT LE GE EQ NE")
+comp_expr = infixNotation(arith_expr,
+ [
+ (comparisonop, 2, opAssoc.LEFT, EvalComparisonOp),
+ ])
+
+def main():
+ # sample expressions posted on comp.lang.python, asking for advice
+ # in safely evaluating them
+ rules=[
+ '( A - B ) = 0',
+ '(A + B + C + D + E + F + G + H + I) = J',
+ '(A + B + C + D + E + F + G + H) = I',
+ '(A + B + C + D + E + F) = G',
+ '(A + B + C + D + E) = (F + G + H + I + J)',
+ '(A + B + C + D + E) = (F + G + H + I)',
+ '(A + B + C + D + E) = F',
+ '(A + B + C + D) = (E + F + G + H)',
+ '(A + B + C) = (D + E + F)',
+ '(A + B) = (C + D + E + F)',
+ '(A + B) = (C + D)',
+ '(A + B) = (C - D + E - F - G + H + I + J)',
+ '(A + B) = C',
+ '(A + B) = 0',
+ '(A+B+C+D+E) = (F+G+H+I+J)',
+ '(A+B+C+D) = (E+F+G+H)',
+ '(A+B+C+D)=(E+F+G+H)',
+ '(A+B+C)=(D+E+F)',
+ '(A+B)=(C+D)',
+ '(A+B)=C',
+ '(A-B)=C',
+ '(A/(B+C))',
+ '(B/(C+D))',
+ '(G + H) = I',
+ '-0.99 LE ((A+B+C)-(D+E+F+G)) LE 0.99',
+ '-0.99 LE (A-(B+C)) LE 0.99',
+ '-1000.00 LE A LE 0.00',
+ '-5000.00 LE A LE 0.00',
+ 'A < B',
+ 'A < 7000',
+ 'A = -(B)',
+ 'A = C',
+ 'A = 0',
+ 'A GT 0',
+ 'A GT 0.00',
+ 'A GT 7.00',
+ 'A LE B',
+ 'A LT -1000.00',
+ 'A LT -5000',
+ 'A LT 0',
+ 'A=(B+C+D)',
+ 'A=B',
+ 'I = (G + H)',
+ '0.00 LE A LE 4.00',
+ '4.00 LT A LE 7.00',
+ '0.00 LE A LE 4.00 LE E > D',
+ '2**2**(A+3)',
+ ]
+ vars_={'A': 0, 'B': 1.1, 'C': 2.2, 'D': 3.3, 'E': 4.4, 'F': 5.5, 'G':
+ 6.6, 'H':7.7, 'I':8.8, 'J':9.9}
+
+ # define tests from given rules
+ tests = []
+ for t in rules:
+ t_orig = t
+ t = t.replace("=","==")
+ t = t.replace("EQ","==")
+ t = t.replace("LE","<=")
+ t = t.replace("GT",">")
+ t = t.replace("LT","<")
+ t = t.replace("GE",">=")
+ t = t.replace("LE","<=")
+ t = t.replace("NE","!=")
+ t = t.replace("<>","!=")
+ tests.append( (t_orig,eval(t,vars_)) )
+
+ # copy vars_ to EvalConstant lookup dict
+ EvalConstant.vars_ = vars_
+ failed = 0
+ for test,expected in tests:
+ ret = comp_expr.parseString(test)[0]
+ parsedvalue = ret.eval()
+ print(test, expected, parsedvalue)
+ if parsedvalue != expected:
+ print("<<< FAIL")
+ failed += 1
+ else:
+ print('')
+
+ print('')
+ if failed:
+ print(failed, "tests FAILED")
+ return 1
+ else:
+ print("all tests PASSED")
+ return 0
+
+if __name__=='__main__':
+ exit(main())
diff --git a/examples/httpServerLogParser.py b/examples/httpServerLogParser.py
index 261cea3..b10678b 100644
--- a/examples/httpServerLogParser.py
+++ b/examples/httpServerLogParser.py
@@ -1,73 +1,72 @@
-# httpServerLogParser.py
-#
-# Copyright (c) 2016, Paul McGuire
-#
-"""
-Parser for HTTP server log output, of the form:
-
-195.146.134.15 - - [20/Jan/2003:08:55:36 -0800]
-"GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html"
-"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
-127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300]
-"GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css"
-"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6"
-
-You can then break it up as follows:
-IP ADDRESS - -
-Server Date / Time [SPACE]
-"GET /path/to/page
-HTTP/Type Request"
-Success Code
-Bytes Sent To Client
-Referer
-Client Software
-"""
-
-from pyparsing import alphas,nums, dblQuotedString, Combine, Word, Group, delimitedList, Suppress, removeQuotes
-import string
-
-def getCmdFields( s, l, t ):
- t["method"],t["requestURI"],t["protocolVersion"] = t[0].strip('"').split()
-
-logLineBNF = None
-def getLogLineBNF():
- global logLineBNF
-
- if logLineBNF is None:
- integer = Word( nums )
- ipAddress = delimitedList( integer, ".", combine=True )
-
- timeZoneOffset = Word("+-",nums)
- month = Word(string.uppercase, string.lowercase, exact=3)
- serverDateTime = Group( Suppress("[") +
- Combine( integer + "/" + month + "/" + integer +
- ":" + integer + ":" + integer + ":" + integer ) +
- timeZoneOffset +
- Suppress("]") )
-
- logLineBNF = ( ipAddress.setResultsName("ipAddr") +
- Suppress("-") +
- ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") +
- serverDateTime.setResultsName("timestamp") +
- dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) +
- (integer | "-").setResultsName("statusCode") +
- (integer | "-").setResultsName("numBytesSent") +
- dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) +
- dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) )
- return logLineBNF
-
-testdata = """
-195.146.134.15 - - [20/Jan/2003:08:55:36 -0800] "GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
-111.111.111.11 - - [16/Feb/2004:04:09:49 -0800] "GET /ads/redirectads/336x280redirect.htm HTTP/1.1" 304 - "http://www.foobarp.org/theme_detail.php?type=vs&cat=0&mid=27512" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
-11.111.11.111 - - [16/Feb/2004:10:35:12 -0800] "GET /ads/redirectads/468x60redirect.htm HTTP/1.1" 200 541 "http://11.11.111.11/adframe.php?n=ad1f311a&what=zone:56" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) Opera 7.20 [ru\"]"
-127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300] "GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6"
-"""
-for line in testdata.split("\n"):
- if not line: continue
- fields = getLogLineBNF().parseString(line)
- print(fields.dump())
- #~ print repr(fields)
- #~ for k in fields.keys():
- #~ print "fields." + k + " =", fields[k]
- print(fields.asXML("LOG"))
- print()
+# httpServerLogParser.py
+#
+# Copyright (c) 2016, Paul McGuire
+#
+"""
+Parser for HTTP server log output, of the form:
+
+195.146.134.15 - - [20/Jan/2003:08:55:36 -0800]
+"GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html"
+"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
+127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300]
+"GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css"
+"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6"
+
+You can then break it up as follows:
+IP ADDRESS - -
+Server Date / Time [SPACE]
+"GET /path/to/page
+HTTP/Type Request"
+Success Code
+Bytes Sent To Client
+Referer
+Client Software
+"""
+
+from pyparsing import alphas,nums, dblQuotedString, Combine, Word, Group, delimitedList, Suppress, removeQuotes
+import string
+
+def getCmdFields( s, l, t ):
+ t["method"],t["requestURI"],t["protocolVersion"] = t[0].strip('"').split()
+
+logLineBNF = None
+def getLogLineBNF():
+ global logLineBNF
+
+ if logLineBNF is None:
+ integer = Word( nums )
+ ipAddress = delimitedList( integer, ".", combine=True )
+
+ timeZoneOffset = Word("+-",nums)
+ month = Word(string.ascii_uppercase, string.ascii_lowercase, exact=3)
+ serverDateTime = Group( Suppress("[") +
+ Combine( integer + "/" + month + "/" + integer +
+ ":" + integer + ":" + integer + ":" + integer ) +
+ timeZoneOffset +
+ Suppress("]") )
+
+ logLineBNF = ( ipAddress.setResultsName("ipAddr") +
+ Suppress("-") +
+ ("-" | Word( alphas+nums+"@._" )).setResultsName("auth") +
+ serverDateTime.setResultsName("timestamp") +
+ dblQuotedString.setResultsName("cmd").setParseAction(getCmdFields) +
+ (integer | "-").setResultsName("statusCode") +
+ (integer | "-").setResultsName("numBytesSent") +
+ dblQuotedString.setResultsName("referrer").setParseAction(removeQuotes) +
+ dblQuotedString.setResultsName("clientSfw").setParseAction(removeQuotes) )
+ return logLineBNF
+
+testdata = """
+195.146.134.15 - - [20/Jan/2003:08:55:36 -0800] "GET /path/to/page.html HTTP/1.0" 200 4649 "http://www.somedomain.com/020602/page.html" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
+111.111.111.11 - - [16/Feb/2004:04:09:49 -0800] "GET /ads/redirectads/336x280redirect.htm HTTP/1.1" 304 - "http://www.foobarp.org/theme_detail.php?type=vs&cat=0&mid=27512" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"
+11.111.11.111 - - [16/Feb/2004:10:35:12 -0800] "GET /ads/redirectads/468x60redirect.htm HTTP/1.1" 200 541 "http://11.11.111.11/adframe.php?n=ad1f311a&what=zone:56" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) Opera 7.20 [ru\"]"
+127.0.0.1 - u.surname@domain.com [12/Sep/2006:14:13:53 +0300] "GET /skins/monobook/external.png HTTP/1.0" 304 - "http://wiki.mysite.com/skins/monobook/main.css" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.6) Gecko/20060728 Firefox/1.5.0.6"
+"""
+for line in testdata.split("\n"):
+ if not line: continue
+ fields = getLogLineBNF().parseString(line)
+ print(fields.dump())
+ #~ print repr(fields)
+ #~ for k in fields.keys():
+ #~ print "fields." + k + " =", fields[k]
+ print()
diff --git a/examples/lucene_grammar.py b/examples/lucene_grammar.py
index 07eb319..bf92509 100644
--- a/examples/lucene_grammar.py
+++ b/examples/lucene_grammar.py
@@ -1,325 +1,332 @@
-#
-# lucene_grammar.py
-#
-# Copyright 2011, Paul McGuire
-#
-# implementation of Lucene grammar, as decribed
-# at http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/docs/queryparsersyntax.html
-#
-
-import pyparsing as pp
-from pyparsing import pyparsing_common as ppc
-pp.ParserElement.enablePackrat()
-
-COLON,LBRACK,RBRACK,LBRACE,RBRACE,TILDE,CARAT = map(pp.Literal,":[]{}~^")
-LPAR,RPAR = map(pp.Suppress,"()")
-and_, or_, not_, to_ = map(pp.CaselessKeyword, "AND OR NOT TO".split())
-keyword = and_ | or_ | not_ | to_
-
-expression = pp.Forward()
-
-valid_word = pp.Regex(r'([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))+').setName("word")
-valid_word.setParseAction(
- lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\')
- )
-
-string = pp.QuotedString('"')
-
-required_modifier = pp.Literal("+")("required")
-prohibit_modifier = pp.Literal("-")("prohibit")
-integer = ppc.integer()
-proximity_modifier = pp.Group(TILDE + integer("proximity"))
-number = ppc.fnumber()
-fuzzy_modifier = TILDE + pp.Optional(number, default=0.5)("fuzzy")
-
-term = pp.Forward()
-field_name = valid_word().setName("fieldname")
-incl_range_search = pp.Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK)
-excl_range_search = pp.Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE)
-range_search = incl_range_search("incl_range") | excl_range_search("excl_range")
-boost = (CARAT + number("boost"))
-
-string_expr = pp.Group(string + proximity_modifier) | string
-word_expr = pp.Group(valid_word + fuzzy_modifier) | valid_word
-term << (pp.Optional(field_name("field") + COLON)
- + (word_expr | string_expr | range_search | pp.Group(LPAR + expression + RPAR))
- + pp.Optional(boost))
-term.setParseAction(lambda t:[t] if 'field' in t or 'boost' in t else None)
-
-expression << pp.infixNotation(term,
- [
- (required_modifier | prohibit_modifier, 1, pp.opAssoc.RIGHT),
- ((not_ | '!').setParseAction(lambda: "NOT"), 1, pp.opAssoc.RIGHT),
- ((and_ | '&&').setParseAction(lambda: "AND"), 2, pp.opAssoc.LEFT),
- (pp.Optional(or_ | '||').setParseAction(lambda: "OR"), 2, pp.opAssoc.LEFT),
- ])
-
-# test strings taken from grammar description doc, and TestQueryParser.java
-tests = r"""
- # Success tests
- a and b
- a and not b
- a and !b
- a && !b
- a&&!b
- name:a
- name:a and not title:b
- (a^100 c d f) and !z
- name:"blah de blah"
- title:(+return +"pink panther")
- title:"The Right Way" AND text:go
- title:"Do it right" AND right
- title:Do it right
- roam~
- roam~0.8
- "jakarta apache"~10
- mod_date:[20020101 TO 20030101]
- title:{Aida TO Carmen}
- jakarta apache
- jakarta^4 apache
- "jakarta apache"^4 "Apache Lucene"
- "jakarta apache" jakarta
- "jakarta apache" OR jakarta
- "jakarta apache" AND "Apache Lucene"
- +jakarta lucene
- "jakarta apache" NOT "Apache Lucene"
- "jakarta apache" -"Apache Lucene"
- (jakarta OR apache) AND website
- \(1+1\)\:2
- c\:\\windows
- (fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)
- (fieldX:xxxxx fieldy:xxxxxxxx)^2 AND (fieldx:the fieldy:foo)
- (fieldX:xxxxx~0.5 fieldy:xxxxxxxx)^2 AND (fieldx:the fieldy:foo)
- +term -term term
- foo:term AND field:anotherTerm
- germ term^2.0
- (term)^2.0
- (foo OR bar) AND (baz OR boo)
- +(apple \"steve jobs\") -(foo bar baz)
- +title:(dog OR cat) -author:\"bob dole\"
- a AND b
- +a +b
- (a AND b)
- c OR (a AND b)
- c (+a +b)
- a AND NOT b
- +a -b
- a AND -b
- a AND !b
- a && b
- a && ! b
- a OR b
- a b
- a || b
- a OR !b
- a -b
- a OR ! b
- a OR -b
- a - b
- a + b
- a ! b
- +foo:term +anotherterm
- hello
- term^2.0
- (germ term)^2.0
- term^2
- +(foo bar) +(baz boo)
- ((a OR b) AND NOT c) OR d
- (+(a b) -c) d
- field
- a&&b
- .NET
- term
- germ
- 3
- term 1.0 1 2
- term term1 term2
- term term term
- term*
- term*^2
- term*^2.0
- term~
- term~2.0
- term~0.7
- term~^3
- term~2.0^3.0
- term*germ
- term*germ^3
- term*germ^3.0
- term~1.1
- [A TO C]
- t*erm*
- *term*
- term term^3.0 term
- term stop^3.0 term
- term +stop term
- term -stop term
- drop AND (stop) AND roll
- +drop +roll
- term +(stop) term
- term -(stop) term
- drop AND stop AND roll
- term phrase term
- term (phrase1 phrase2) term
- term AND NOT phrase term
- +term -(phrase1 phrase2) term
- stop^3
- stop
- (stop)^3
- ((stop))^3
- (stop^3)
- ((stop)^3)
- (stop)
- ((stop))
- term +stop
- [ a TO z]
- [a TO z]
- [ a TO z ]
- { a TO z}
- {a TO z}
- { a TO z }
- { a TO z }^2.0
- {a TO z}^2.0
- [ a TO z] OR bar
- [a TO z] bar
- [ a TO z] AND bar
- +[a TO z] +bar
- ( bar blar { a TO z})
- bar blar {a TO z}
- gack ( bar blar { a TO z})
- gack (bar blar {a TO z})
- [* TO Z]
- [* TO z]
- [A TO *]
- [a TO *]
- [* TO *]
- [\* TO \*]
- \!blah
- \:blah
- blah
- \~blah
- \*blah
- a
- a-b:c
- a+b:c
- a\:b:c
- a\\b:c
- a:b-c
- a:b+c
- a:b\:c
- a:b\\c
- a:b-c*
- a:b+c*
- a:b\:c*
- a:b\\c*
- a:b-c~2.0
- a:b+c~2.0
- a:b\:c~
- a:b\\c~
- [a- TO a+]
- [ a\\ TO a\* ]
- c\:\\temp\\\~foo.txt
- abc
- XYZ
- (item:\\ item:ABCD\\)
- \*
- *
- \\
- \||
- \&&
- a\:b\:c
- a\\b\:c
- a\:b\\c
- a\:b\:c\*
- a\:b\\\\c\*
- a:b-c~
- a:b+c~
- a\:b\:c\~
- a\:b\\c\~
- +weltbank +worlbank
- +term +term +term
- term +term term
- term term +term
- term +term +term
- -term term term
- -term +term +term
- on
- on^1.0
- hello^2.0
- the^3
- the
- some phrase
- xunit~
- one two three
- A AND B OR C AND D
- +A +B +C +D
- foo:zoo*
- foo:zoo*^2
- zoo
- foo:*
- foo:*^2
- *:foo
- a:the OR a:foo
- a:woo OR a:the
- *:*
- (*:*)
- +*:* -*:*
- the wizard of ozzy
- """
-
-failtests = r"""
- # Failure tests
- field:term:with:colon some more terms
- (sub query)^5.0^2.0 plus more
- a:b:c
- a:b:c~
- a:b:c*
- a:b:c~2.0
- \+blah
- \-blah
- foo \|| bar
- foo \AND bar
- \a
- a\-b:c
- a\+b:c
- a\b:c
- a:b\-c
- a:b\+c
- a\-b\:c
- a\+b\:c
- a:b\c*
- a:b\-c~
- a:b\+c~
- a:b\c
- a:b\-c*
- a:b\+c*
- [ a\- TO a\+ ]
- [a\ TO a*]
- a\\\+b
- a\+b
- c:\temp\~foo.txt
- XY\
- a\u0062c
- a:b\c~2.0
- XY\u005a
- XY\u005A
- item:\ item:ABCD\
- \
- a\ or b
- a\:b\-c
- a\:b\+c
- a\:b\-c\*
- a\:b\+c\*
- a\:b\-c\~
- a\:b\+c\~
- a:b\c~
- [ a\ TO a* ]
- """
-
-success1, _ = expression.runTests(tests)
-success2, _ = expression.runTests(failtests, failureTests=True)
-
-print(("FAIL", "OK")[success1 and success2])
-
-if not (success1 and success2):
- raise Exception("failure in lucene grammar parser, check output")
+#
+# lucene_grammar.py
+#
+# Copyright 2011, Paul McGuire
+#
+# implementation of Lucene grammar, as decribed
+# at http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/docs/queryparsersyntax.html
+#
+
+import pyparsing as pp
+from pyparsing import pyparsing_common as ppc
+pp.ParserElement.enablePackrat()
+
+COLON,LBRACK,RBRACK,LBRACE,RBRACE,TILDE,CARAT = map(pp.Literal,":[]{}~^")
+LPAR,RPAR = map(pp.Suppress,"()")
+and_, or_, not_, to_ = map(pp.CaselessKeyword, "AND OR NOT TO".split())
+keyword = and_ | or_ | not_ | to_
+
+expression = pp.Forward()
+
+valid_word = pp.Regex(r'([a-zA-Z0-9*_+.-]|\\\\|\\([+\-!(){}\[\]^"~*?:]|\|\||&&))+').setName("word")
+valid_word.setParseAction(
+ lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\')
+ )
+
+string = pp.QuotedString('"')
+
+required_modifier = pp.Literal("+")("required")
+prohibit_modifier = pp.Literal("-")("prohibit")
+integer = ppc.integer()
+proximity_modifier = pp.Group(TILDE + integer("proximity"))
+number = ppc.fnumber()
+fuzzy_modifier = TILDE + pp.Optional(number, default=0.5)("fuzzy")
+
+term = pp.Forward()
+field_name = valid_word().setName("fieldname")
+incl_range_search = pp.Group(LBRACK - term("lower") + to_ + term("upper") + RBRACK)
+excl_range_search = pp.Group(LBRACE - term("lower") + to_ + term("upper") + RBRACE)
+range_search = incl_range_search("incl_range") | excl_range_search("excl_range")
+boost = (CARAT - number("boost"))
+
+string_expr = pp.Group(string + proximity_modifier) | string
+word_expr = pp.Group(valid_word + fuzzy_modifier) | valid_word
+term << (pp.Optional(field_name("field") + COLON)
+ + (word_expr | string_expr | range_search | pp.Group(LPAR + expression + RPAR))
+ + pp.Optional(boost))
+term.setParseAction(lambda t:[t] if 'field' in t or 'boost' in t else None)
+
+expression << pp.infixNotation(term,
+ [
+ (required_modifier | prohibit_modifier, 1, pp.opAssoc.RIGHT),
+ ((not_ | '!').setParseAction(lambda: "NOT"), 1, pp.opAssoc.RIGHT),
+ ((and_ | '&&').setParseAction(lambda: "AND"), 2, pp.opAssoc.LEFT),
+ (pp.Optional(or_ | '||').setParseAction(lambda: "OR"), 2, pp.opAssoc.LEFT),
+ ])
+
+if __name__ == '__main__':
+
+ # test strings taken from grammar description doc, and TestQueryParser.java
+ tests = r"""
+ # Success tests
+ a and b
+ a and not b
+ a and !b
+ a && !b
+ a&&!b
+ name:a
+ name:a and not title:b
+ (a^100 c d f) and !z
+ name:"blah de blah"
+ title:(+return +"pink panther")
+ title:"The Right Way" AND text:go
+ title:"Do it right" AND right
+ title:Do it right
+ roam~
+ roam~0.8
+ "jakarta apache"~10
+ mod_date:[20020101 TO 20030101]
+ title:{Aida TO Carmen}
+ jakarta apache
+ jakarta^4 apache
+ "jakarta apache"^4 "Apache Lucene"
+ "jakarta apache" jakarta
+ "jakarta apache" OR jakarta
+ "jakarta apache" AND "Apache Lucene"
+ +jakarta lucene
+ "jakarta apache" NOT "Apache Lucene"
+ "jakarta apache" -"Apache Lucene"
+ (jakarta OR apache) AND website
+ \(1+1\)\:2
+ c\:\\windows
+ (fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)
+ (fieldX:xxxxx fieldy:xxxxxxxx)^2 AND (fieldx:the fieldy:foo)
+ (fieldX:xxxxx~0.5 fieldy:xxxxxxxx)^2 AND (fieldx:the fieldy:foo)
+ +term -term term
+ foo:term AND field:anotherTerm
+ germ term^2.0
+ (term)^2.0
+ (foo OR bar) AND (baz OR boo)
+ +(apple \"steve jobs\") -(foo bar baz)
+ +title:(dog OR cat) -author:\"bob dole\"
+ a AND b
+ +a +b
+ (a AND b)
+ c OR (a AND b)
+ c (+a +b)
+ a AND NOT b
+ +a -b
+ a AND -b
+ a AND !b
+ a && b
+ a && ! b
+ a OR b
+ a b
+ a || b
+ a OR !b
+ a -b
+ a OR ! b
+ a OR -b
+ a - b
+ a + b
+ a ! b
+ +foo:term +anotherterm
+ hello
+ term^2.0
+ (germ term)^2.0
+ term^2
+ +(foo bar) +(baz boo)
+ ((a OR b) AND NOT c) OR d
+ (+(a b) -c) d
+ field
+ a&&b
+ .NET
+ term
+ germ
+ 3
+ term 1.0 1 2
+ term term1 term2
+ term term term
+ term*
+ term*^2
+ term*^2.0
+ term~
+ term~2.0
+ term~0.7
+ term~^3
+ term~2.0^3.0
+ term*germ
+ term*germ^3
+ term*germ^3.0
+ term~1.1
+ [A TO C]
+ t*erm*
+ *term*
+ term term^3.0 term
+ term stop^3.0 term
+ term +stop term
+ term -stop term
+ drop AND (stop) AND roll
+ +drop +roll
+ term +(stop) term
+ term -(stop) term
+ drop AND stop AND roll
+ term phrase term
+ term (phrase1 phrase2) term
+ term AND NOT phrase term
+ +term -(phrase1 phrase2) term
+ stop^3
+ stop
+ (stop)^3
+ ((stop))^3
+ (stop^3)
+ ((stop)^3)
+ (stop)
+ ((stop))
+ term +stop
+ [ a TO z]
+ [a TO z]
+ [ a TO z ]
+ { a TO z}
+ {a TO z}
+ { a TO z }
+ { a TO z }^2.0
+ {a TO z}^2.0
+ [ a TO z] OR bar
+ [a TO z] bar
+ [ a TO z] AND bar
+ +[a TO z] +bar
+ ( bar blar { a TO z})
+ bar blar {a TO z}
+ gack ( bar blar { a TO z})
+ gack (bar blar {a TO z})
+ [* TO Z]
+ [* TO z]
+ [A TO *]
+ [a TO *]
+ [* TO *]
+ [\* TO \*]
+ \!blah
+ \:blah
+ blah
+ \~blah
+ \*blah
+ a
+ a-b:c
+ a+b:c
+ a\:b:c
+ a\\b:c
+ a:b-c
+ a:b+c
+ a:b\:c
+ a:b\\c
+ a:b-c*
+ a:b+c*
+ a:b\:c*
+ a:b\\c*
+ a:b-c~2.0
+ a:b+c~2.0
+ a:b\:c~
+ a:b\\c~
+ [a- TO a+]
+ [ a\\ TO a\* ]
+ c\:\\temp\\\~foo.txt
+ abc
+ XYZ
+ (item:\\ item:ABCD\\)
+ \*
+ *
+ \\
+ \||
+ \&&
+ a\:b\:c
+ a\\b\:c
+ a\:b\\c
+ a\:b\:c\*
+ a\:b\\\\c\*
+ a:b-c~
+ a:b+c~
+ a\:b\:c\~
+ a\:b\\c\~
+ +weltbank +worlbank
+ +term +term +term
+ term +term term
+ term term +term
+ term +term +term
+ -term term term
+ -term +term +term
+ on
+ on^1.0
+ hello^2.0
+ the^3
+ the
+ some phrase
+ xunit~
+ one two three
+ A AND B OR C AND D
+ +A +B +C +D
+ foo:zoo*
+ foo:zoo*^2
+ zoo
+ foo:*
+ foo:*^2
+ *:foo
+ a:the OR a:foo
+ a:woo OR a:the
+ *:*
+ (*:*)
+ +*:* -*:*
+ the wizard of ozzy
+ """
+
+ failtests = r"""
+ # Failure tests
+
+ # multiple ':'s in term
+ field:term:with:colon some more terms
+
+ # multiple '^'s in term
+ (sub query)^5.0^2.0 plus more
+ a:b:c
+ a:b:c~
+ a:b:c*
+ a:b:c~2.0
+ \+blah
+ \-blah
+ foo \|| bar
+ foo \AND bar
+ \a
+ a\-b:c
+ a\+b:c
+ a\b:c
+ a:b\-c
+ a:b\+c
+ a\-b\:c
+ a\+b\:c
+ a:b\c*
+ a:b\-c~
+ a:b\+c~
+ a:b\c
+ a:b\-c*
+ a:b\+c*
+ [ a\- TO a\+ ]
+ [a\ TO a*]
+ a\\\+b
+ a\+b
+ c:\temp\~foo.txt
+ XY\
+ a\u0062c
+ a:b\c~2.0
+ XY\u005a
+ XY\u005A
+ item:\ item:ABCD\
+ \
+ a\ or b
+ a\:b\-c
+ a\:b\+c
+ a\:b\-c\*
+ a\:b\+c\*
+ a\:b\-c\~
+ a\:b\+c\~
+ a:b\c~
+ [ a\ TO a* ]
+ """
+
+ success1, _ = expression.runTests(tests)
+ success2, _ = expression.runTests(failtests, failureTests=True)
+
+ print("All tests:", ("FAIL", "OK")[success1 and success2])
+
+ if not (success1 and success2):
+ import sys
+ sys.exit(1)
diff --git a/examples/protobuf_parser.py b/examples/protobuf_parser.py
index 68a8f63..0b3e909 100644
--- a/examples/protobuf_parser.py
+++ b/examples/protobuf_parser.py
@@ -1,100 +1,100 @@
-# protobuf_parser.py
-#
-# simple parser for parsing protobuf .proto files
-#
-# Copyright 2010, Paul McGuire
-#
-
-from pyparsing import (Word, alphas, alphanums, Regex, Suppress, Forward,
- Group, oneOf, ZeroOrMore, Optional, delimitedList,
- restOfLine, quotedString, Dict)
-
-ident = Word(alphas+"_",alphanums+"_").setName("identifier")
-integer = Regex(r"[+-]?\d+")
-
-LBRACE,RBRACE,LBRACK,RBRACK,LPAR,RPAR,EQ,SEMI = map(Suppress,"{}[]()=;")
-
-kwds = """message required optional repeated enum extensions extends extend
- to package service rpc returns true false option import"""
-for kw in kwds.split():
- exec("{0}_ = Keyword('{1}')".format(kw.upper(), kw))
-
-messageBody = Forward()
-
-messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody("body") + RBRACE
-
-typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64
- fixed32 fixed64 sfixed32 sfixed64 bool string bytes""") | ident
-rvalue = integer | TRUE_ | FALSE_ | ident
-fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK
-fieldDefn = (( REQUIRED_ | OPTIONAL_ | REPEATED_ )("fieldQualifier") -
- typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI)
-
-# enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
-enumDefn = ENUM_("typespec") - ident('name') + LBRACE + Dict( ZeroOrMore( Group(ident + EQ + integer + SEMI) ))('values') + RBRACE
-
-# extensionsDefn ::= 'extensions' integer 'to' integer ';'
-extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI
-
-# messageExtension ::= 'extend' ident '{' messageBody '}'
-messageExtension = EXTEND_ - ident + LBRACE + messageBody + RBRACE
-
-# messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }*
-messageBody << Group(ZeroOrMore( Group(fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension) ))
-
-# methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
-methodDefn = (RPC_ - ident("methodName") +
- LPAR + Optional(ident("methodParam")) + RPAR +
- RETURNS_ + LPAR + Optional(ident("methodReturn")) + RPAR)
-
-# serviceDefn ::= 'service' ident '{' methodDefn* '}'
-serviceDefn = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(methodDefn)) + RBRACE
-
-# packageDirective ::= 'package' ident [ '.' ident]* ';'
-packageDirective = Group(PACKAGE_ - delimitedList(ident, '.', combine=True) + SEMI)
-
-comment = '//' + restOfLine
-
-importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI
-
-optionDirective = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
-
-topLevelStatement = Group(messageDefn | messageExtension | enumDefn | serviceDefn | importDirective | optionDirective)
-
-parser = Optional(packageDirective) + ZeroOrMore(topLevelStatement)
-
-parser.ignore(comment)
-
-
-test1 = """message Person {
- required int32 id = 1;
- required string name = 2;
- optional string email = 3;
-}"""
-
-test2 = """package tutorial;
-
-message Person {
- required string name = 1;
- required int32 id = 2;
- optional string email = 3;
-
- enum PhoneType {
- MOBILE = 0;
- HOME = 1;
- WORK = 2;
- }
-
- message PhoneNumber {
- required string number = 1;
- optional PhoneType type = 2 [default = HOME];
- }
-
- repeated PhoneNumber phone = 4;
-}
-
-message AddressBook {
- repeated Person person = 1;
-}"""
-
-parser.runTests([test1, test2])
+# protobuf_parser.py
+#
+# simple parser for parsing protobuf .proto files
+#
+# Copyright 2010, Paul McGuire
+#
+
+from pyparsing import (Word, alphas, alphanums, Regex, Suppress, Forward,
+ Keyword, Group, oneOf, ZeroOrMore, Optional, delimitedList,
+ restOfLine, quotedString, Dict)
+
+ident = Word(alphas+"_",alphanums+"_").setName("identifier")
+integer = Regex(r"[+-]?\d+")
+
+LBRACE,RBRACE,LBRACK,RBRACK,LPAR,RPAR,EQ,SEMI = map(Suppress,"{}[]()=;")
+
+kwds = """message required optional repeated enum extensions extends extend
+ to package service rpc returns true false option import"""
+for kw in kwds.split():
+ exec("{0}_ = Keyword('{1}')".format(kw.upper(), kw))
+
+messageBody = Forward()
+
+messageDefn = MESSAGE_ - ident("messageId") + LBRACE + messageBody("body") + RBRACE
+
+typespec = oneOf("""double float int32 int64 uint32 uint64 sint32 sint64
+ fixed32 fixed64 sfixed32 sfixed64 bool string bytes""") | ident
+rvalue = integer | TRUE_ | FALSE_ | ident
+fieldDirective = LBRACK + Group(ident + EQ + rvalue) + RBRACK
+fieldDefn = (( REQUIRED_ | OPTIONAL_ | REPEATED_ )("fieldQualifier") -
+ typespec("typespec") + ident("ident") + EQ + integer("fieldint") + ZeroOrMore(fieldDirective) + SEMI)
+
+# enumDefn ::= 'enum' ident '{' { ident '=' integer ';' }* '}'
+enumDefn = ENUM_("typespec") - ident('name') + LBRACE + Dict( ZeroOrMore( Group(ident + EQ + integer + SEMI) ))('values') + RBRACE
+
+# extensionsDefn ::= 'extensions' integer 'to' integer ';'
+extensionsDefn = EXTENSIONS_ - integer + TO_ + integer + SEMI
+
+# messageExtension ::= 'extend' ident '{' messageBody '}'
+messageExtension = EXTEND_ - ident + LBRACE + messageBody + RBRACE
+
+# messageBody ::= { fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension }*
+messageBody << Group(ZeroOrMore( Group(fieldDefn | enumDefn | messageDefn | extensionsDefn | messageExtension) ))
+
+# methodDefn ::= 'rpc' ident '(' [ ident ] ')' 'returns' '(' [ ident ] ')' ';'
+methodDefn = (RPC_ - ident("methodName") +
+ LPAR + Optional(ident("methodParam")) + RPAR +
+ RETURNS_ + LPAR + Optional(ident("methodReturn")) + RPAR)
+
+# serviceDefn ::= 'service' ident '{' methodDefn* '}'
+serviceDefn = SERVICE_ - ident("serviceName") + LBRACE + ZeroOrMore(Group(methodDefn)) + RBRACE
+
+# packageDirective ::= 'package' ident [ '.' ident]* ';'
+packageDirective = Group(PACKAGE_ - delimitedList(ident, '.', combine=True) + SEMI)
+
+comment = '//' + restOfLine
+
+importDirective = IMPORT_ - quotedString("importFileSpec") + SEMI
+
+optionDirective = OPTION_ - ident("optionName") + EQ + quotedString("optionValue") + SEMI
+
+topLevelStatement = Group(messageDefn | messageExtension | enumDefn | serviceDefn | importDirective | optionDirective)
+
+parser = Optional(packageDirective) + ZeroOrMore(topLevelStatement)
+
+parser.ignore(comment)
+
+
+test1 = """message Person {
+ required int32 id = 1;
+ required string name = 2;
+ optional string email = 3;
+}"""
+
+test2 = """package tutorial;
+
+message Person {
+ required string name = 1;
+ required int32 id = 2;
+ optional string email = 3;
+
+ enum PhoneType {
+ MOBILE = 0;
+ HOME = 1;
+ WORK = 2;
+ }
+
+ message PhoneNumber {
+ required string number = 1;
+ optional PhoneType type = 2 [default = HOME];
+ }
+
+ repeated PhoneNumber phone = 4;
+}
+
+message AddressBook {
+ repeated Person person = 1;
+}"""
+
+parser.runTests([test1, test2])
diff --git a/examples/removeLineBreaks.py b/examples/removeLineBreaks.py
index df07fba..84bd33e 100644
--- a/examples/removeLineBreaks.py
+++ b/examples/removeLineBreaks.py
@@ -1,45 +1,45 @@
-# removeLineBreaks.py
-#
-# Demonstration of the pyparsing module, converting text files
-# with hard line-breaks to text files with line breaks only
-# between paragraphs. (Helps when converting downloads from Project
-# Gutenberg - https://www.gutenberg.org/ - to import to word processing apps
-# that can reformat paragraphs once hard line-breaks are removed.)
-#
-# Uses parse actions and transformString to remove unwanted line breaks,
-# and to double up line breaks between paragraphs.
-#
-# Copyright 2006, by Paul McGuire
-#
-from pyparsing import *
-
-# define an expression for the body of a line of text - use a parse action to reject any
-# empty lines
-def mustBeNonBlank(s,l,t):
- if not t[0]:
- raise ParseException(s,l,"line body can't be empty")
-lineBody = SkipTo(lineEnd).setParseAction(mustBeNonBlank)
-
-# now define a line with a trailing lineEnd, to be replaced with a space character
-textLine = lineBody + Suppress(lineEnd).setParseAction(replaceWith(" "))
-
-# define a paragraph, with a separating lineEnd, to be replaced with a double newline
-para = OneOrMore(textLine) + Suppress(lineEnd).setParseAction(replaceWith("\n\n"))
-
-
-# run a test
-test = """
- Now is the
- time for
- all
- good men
- to come to
-
- the aid of their
- country.
-"""
-print(para.transformString(test))
-
-# process an entire file
-z = para.transformString(file("Successful Methods of Public Speaking.txt").read())
-file("Successful Methods of Public Speaking(2).txt","w").write(z)
+# removeLineBreaks.py
+#
+# Demonstration of the pyparsing module, converting text files
+# with hard line-breaks to text files with line breaks only
+# between paragraphs. (Helps when converting downloads from Project
+# Gutenberg - https://www.gutenberg.org/ - to import to word processing apps
+# that can reformat paragraphs once hard line-breaks are removed.)
+#
+# Uses parse actions and transformString to remove unwanted line breaks,
+# and to double up line breaks between paragraphs.
+#
+# Copyright 2006, by Paul McGuire
+#
+from pyparsing import *
+
+# define an expression for the body of a line of text - use a parse action to reject any
+# empty lines
+def mustBeNonBlank(s,l,t):
+ if not t[0]:
+ raise ParseException(s,l,"line body can't be empty")
+lineBody = SkipTo(lineEnd).setParseAction(mustBeNonBlank)
+
+# now define a line with a trailing lineEnd, to be replaced with a space character
+textLine = lineBody + Suppress(lineEnd).setParseAction(replaceWith(" "))
+
+# define a paragraph, with a separating lineEnd, to be replaced with a double newline
+para = OneOrMore(textLine) + Suppress(lineEnd).setParseAction(replaceWith("\n\n"))
+
+
+# run a test
+test = """
+ Now is the
+ time for
+ all
+ good men
+ to come to
+
+ the aid of their
+ country.
+"""
+print(para.transformString(test))
+
+# process an entire file
+z = para.transformString(open("Successful Methods of Public Speaking.txt").read())
+open("Successful Methods of Public Speaking(2).txt","w").write(z)