diff options
author | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-23 21:18:44 -0500 |
---|---|---|
committer | Paul McGuire <ptmcg@austin.rr.com> | 2019-07-23 21:18:44 -0500 |
commit | a64494dd3ce957c5bdd9ec2d7114c9de88af7c28 (patch) | |
tree | 8a409254f69b4adce2fd3f5357621a000f7b4a53 /examples | |
parent | 07d82bb767d75a0a30bd6f806938c1f9fe50d8ee (diff) | |
download | pyparsing-git-a64494dd3ce957c5bdd9ec2d7114c9de88af7c28.tar.gz |
Version 2.4.2a1 - changing [...] notation to ZeroOrMore, not OneOrMore
Diffstat (limited to 'examples')
-rw-r--r-- | examples/sexpParser.py | 314 | ||||
-rw-r--r-- | examples/wordsToNum.py | 220 |
2 files changed, 267 insertions, 267 deletions
diff --git a/examples/sexpParser.py b/examples/sexpParser.py index fd8ffd3..0d006d2 100644 --- a/examples/sexpParser.py +++ b/examples/sexpParser.py @@ -1,157 +1,157 @@ -# sexpParser.py
-#
-# Demonstration of the pyparsing module, implementing a simple S-expression
-# parser.
-#
-# Updates:
-# November, 2011 - fixed errors in precedence of alternatives in simpleString;
-# fixed exception raised in verifyLen to properly signal the input string
-# and exception location so that markInputline works correctly; fixed
-# definition of decimal to accept a single '0' and optional leading '-'
-# sign; updated tests to improve parser coverage
-#
-# Copyright 2007-2011, by Paul McGuire
-#
-"""
-BNF reference: http://theory.lcs.mit.edu/~rivest/sexp.txt
-
-<sexp> :: <string> | <list>
-<string> :: <display>? <simple-string> ;
-<simple-string> :: <raw> | <token> | <base-64> | <hexadecimal> |
- <quoted-string> ;
-<display> :: "[" <simple-string> "]" ;
-<raw> :: <decimal> ":" <bytes> ;
-<decimal> :: <decimal-digit>+ ;
- -- decimal numbers should have no unnecessary leading zeros
-<bytes> -- any string of bytes, of the indicated length
-<token> :: <tokenchar>+ ;
-<base-64> :: <decimal>? "|" ( <base-64-char> | <whitespace> )* "|" ;
-<hexadecimal> :: "#" ( <hex-digit> | <white-space> )* "#" ;
-<quoted-string> :: <decimal>? <quoted-string-body>
-<quoted-string-body> :: "\"" <bytes> "\""
-<list> :: "(" ( <sexp> | <whitespace> )* ")" ;
-<whitespace> :: <whitespace-char>* ;
-<token-char> :: <alpha> | <decimal-digit> | <simple-punc> ;
-<alpha> :: <upper-case> | <lower-case> | <digit> ;
-<lower-case> :: "a" | ... | "z" ;
-<upper-case> :: "A" | ... | "Z" ;
-<decimal-digit> :: "0" | ... | "9" ;
-<hex-digit> :: <decimal-digit> | "A" | ... | "F" | "a" | ... | "f" ;
-<simple-punc> :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ;
-<whitespace-char> :: " " | "\t" | "\r" | "\n" ;
-<base-64-char> :: <alpha> | <decimal-digit> | "+" | "/" | "=" ;
-<null> :: "" ;
-"""
-
-import pyparsing as pp
-from base64 import b64decode
-import pprint
-
-
-def verify_length(s, l, t):
- t = t[0]
- if t.len is not None:
- t1len = len(t[1])
- if t1len != t.len:
- raise pp.ParseFatalException(s, l, "invalid data of length {0}, expected {1}".format(t1len, t.len))
- return t[1]
-
-
-# define punctuation literals
-LPAR, RPAR, LBRK, RBRK, LBRC, RBRC, VBAR, COLON = (pp.Suppress(c).setName(c) for c in "()[]{}|:")
-
-decimal = pp.Regex(r'-?0|[1-9]\d*').setParseAction(lambda t: int(t[0]))
-hexadecimal = ("#" + pp.Word(pp.hexnums)[...] + "#").setParseAction(lambda t: int("".join(t[1:-1]), 16))
-bytes = pp.Word(pp.printables)
-raw = pp.Group(decimal("len") + COLON + bytes).setParseAction(verify_length)
-base64_ = pp.Group(pp.Optional(decimal | hexadecimal, default=None)("len")
- + VBAR
- + pp.Word(pp.alphanums + "+/=")[...].setParseAction(lambda t: b64decode("".join(t)))
- + VBAR
- ).setParseAction(verify_length)
-
-real = pp.Regex(r"[+-]?\d+\.\d*([eE][+-]?\d+)?").setParseAction(lambda tokens: float(tokens[0]))
-token = pp.Word(pp.alphanums + "-./_:*+=!<>")
-qString = pp.Group(pp.Optional(decimal, default=None)("len")
- + pp.dblQuotedString.setParseAction(pp.removeQuotes)
- ).setParseAction(verify_length)
-
-simpleString = real | base64_ | raw | decimal | token | hexadecimal | qString
-
-display = LBRK + simpleString + RBRK
-string_ = pp.Optional(display) + simpleString
-
-sexp = pp.Forward()
-sexpList = pp.Group(LPAR + sexp[0, ...] + RPAR)
-sexp <<= string_ | sexpList
-
-
-# Test data
-
-test00 = """(snicker "abc" (#03# |YWJj|))"""
-test01 = """(certificate
- (issuer
- (name
- (public-key
- rsa-with-md5
- (e 15 |NFGq/E3wh9f4rJIQVXhS|)
- (n |d738/4ghP9rFZ0gAIYZ5q9y6iskDJwASi5rEQpEQq8ZyMZeIZzIAR2I5iGE=|))
- aid-committee))
- (subject
- (ref
- (public-key
- rsa-with-md5
- (e |NFGq/E3wh9f4rJIQVXhS|)
- (n |d738/4ghP9rFZ0gAIYZ5q9y6iskDJwASi5rEQpEQq8ZyMZeIZzIAR2I5iGE=|))
- tom
- mother))
- (not-before "1997-01-01_09:00:00")
- (not-after "1998-01-01_09:00:00")
- (tag
- (spend (account "12345678") (* numeric range "1" "1000"))))
-"""
-test02 = """(lambda (x) (* x x))"""
-test03 = """(def length
- (lambda (x)
- (cond
- ((not x) 0)
- ( t (+ 1 (length (cdr x))))
- )
- )
-)
-"""
-test04 = """(2:XX "abc" (#03# |YWJj|))"""
-test05 = """(if (is (window_name) "XMMS") (set_workspace 2))"""
-test06 = """(if
- (and
- (is (application_name) "Firefox")
- (or
- (contains (window_name) "Enter name of file to save to")
- (contains (window_name) "Save As")
- (contains (window_name) "Save Image")
- ()
- )
- )
- (geometry "+140+122")
-)
-"""
-test07 = """(defun factorial (x)
- (if (zerop x) 1
- (* x (factorial (- x 1)))))
- """
-test51 = """(2:XX "abc" (#03# |YWJj|))"""
-test51error = """(3:XX "abc" (#03# |YWJj|))"""
-
-test52 = """
- (and
- (or (> uid 1000)
- (!= gid 20)
- )
- (> quota 5.0e+03)
- )
- """
-
-# Run tests
-alltests = [globals()[testname] for testname in sorted(locals()) if testname.startswith("test")]
-
-sexp.runTests(alltests, fullDump=False)
+# sexpParser.py +# +# Demonstration of the pyparsing module, implementing a simple S-expression +# parser. +# +# Updates: +# November, 2011 - fixed errors in precedence of alternatives in simpleString; +# fixed exception raised in verifyLen to properly signal the input string +# and exception location so that markInputline works correctly; fixed +# definition of decimal to accept a single '0' and optional leading '-' +# sign; updated tests to improve parser coverage +# +# Copyright 2007-2011, by Paul McGuire +# +""" +BNF reference: http://theory.lcs.mit.edu/~rivest/sexp.txt + +<sexp> :: <string> | <list> +<string> :: <display>? <simple-string> ; +<simple-string> :: <raw> | <token> | <base-64> | <hexadecimal> | + <quoted-string> ; +<display> :: "[" <simple-string> "]" ; +<raw> :: <decimal> ":" <bytes> ; +<decimal> :: <decimal-digit>+ ; + -- decimal numbers should have no unnecessary leading zeros +<bytes> -- any string of bytes, of the indicated length +<token> :: <tokenchar>+ ; +<base-64> :: <decimal>? "|" ( <base-64-char> | <whitespace> )* "|" ; +<hexadecimal> :: "#" ( <hex-digit> | <white-space> )* "#" ; +<quoted-string> :: <decimal>? <quoted-string-body> +<quoted-string-body> :: "\"" <bytes> "\"" +<list> :: "(" ( <sexp> | <whitespace> )* ")" ; +<whitespace> :: <whitespace-char>* ; +<token-char> :: <alpha> | <decimal-digit> | <simple-punc> ; +<alpha> :: <upper-case> | <lower-case> | <digit> ; +<lower-case> :: "a" | ... | "z" ; +<upper-case> :: "A" | ... | "Z" ; +<decimal-digit> :: "0" | ... | "9" ; +<hex-digit> :: <decimal-digit> | "A" | ... | "F" | "a" | ... | "f" ; +<simple-punc> :: "-" | "." | "/" | "_" | ":" | "*" | "+" | "=" ; +<whitespace-char> :: " " | "\t" | "\r" | "\n" ; +<base-64-char> :: <alpha> | <decimal-digit> | "+" | "/" | "=" ; +<null> :: "" ; +""" + +import pyparsing as pp +from base64 import b64decode +import pprint + + +def verify_length(s, l, t): + t = t[0] + if t.len is not None: + t1len = len(t[1]) + if t1len != t.len: + raise pp.ParseFatalException(s, l, "invalid data of length {0}, expected {1}".format(t1len, t.len)) + return t[1] + + +# define punctuation literals +LPAR, RPAR, LBRK, RBRK, LBRC, RBRC, VBAR, COLON = (pp.Suppress(c).setName(c) for c in "()[]{}|:") + +decimal = pp.Regex(r'-?0|[1-9]\d*').setParseAction(lambda t: int(t[0])) +hexadecimal = ("#" + pp.Word(pp.hexnums)[1, ...] + "#").setParseAction(lambda t: int("".join(t[1:-1]), 16)) +bytes = pp.Word(pp.printables) +raw = pp.Group(decimal("len") + COLON + bytes).setParseAction(verify_length) +base64_ = pp.Group(pp.Optional(decimal | hexadecimal, default=None)("len") + + VBAR + + pp.Word(pp.alphanums + "+/=")[1, ...].setParseAction(lambda t: b64decode("".join(t))) + + VBAR + ).setParseAction(verify_length) + +real = pp.Regex(r"[+-]?\d+\.\d*([eE][+-]?\d+)?").setParseAction(lambda tokens: float(tokens[0])) +token = pp.Word(pp.alphanums + "-./_:*+=!<>") +qString = pp.Group(pp.Optional(decimal, default=None)("len") + + pp.dblQuotedString.setParseAction(pp.removeQuotes) + ).setParseAction(verify_length) + +simpleString = real | base64_ | raw | decimal | token | hexadecimal | qString + +display = LBRK + simpleString + RBRK +string_ = pp.Optional(display) + simpleString + +sexp = pp.Forward() +sexpList = pp.Group(LPAR + sexp[...] + RPAR) +sexp <<= string_ | sexpList + + +# Test data + +test00 = """(snicker "abc" (#03# |YWJj|))""" +test01 = """(certificate + (issuer + (name + (public-key + rsa-with-md5 + (e 15 |NFGq/E3wh9f4rJIQVXhS|) + (n |d738/4ghP9rFZ0gAIYZ5q9y6iskDJwASi5rEQpEQq8ZyMZeIZzIAR2I5iGE=|)) + aid-committee)) + (subject + (ref + (public-key + rsa-with-md5 + (e |NFGq/E3wh9f4rJIQVXhS|) + (n |d738/4ghP9rFZ0gAIYZ5q9y6iskDJwASi5rEQpEQq8ZyMZeIZzIAR2I5iGE=|)) + tom + mother)) + (not-before "1997-01-01_09:00:00") + (not-after "1998-01-01_09:00:00") + (tag + (spend (account "12345678") (* numeric range "1" "1000")))) +""" +test02 = """(lambda (x) (* x x))""" +test03 = """(def length + (lambda (x) + (cond + ((not x) 0) + ( t (+ 1 (length (cdr x)))) + ) + ) +) +""" +test04 = """(2:XX "abc" (#03# |YWJj|))""" +test05 = """(if (is (window_name) "XMMS") (set_workspace 2))""" +test06 = """(if + (and + (is (application_name) "Firefox") + (or + (contains (window_name) "Enter name of file to save to") + (contains (window_name) "Save As") + (contains (window_name) "Save Image") + () + ) + ) + (geometry "+140+122") +) +""" +test07 = """(defun factorial (x) + (if (zerop x) 1 + (* x (factorial (- x 1))))) + """ +test51 = """(2:XX "abc" (#03# |YWJj|))""" +test51error = """(3:XX "abc" (#03# |YWJj|))""" + +test52 = """ + (and + (or (> uid 1000) + (!= gid 20) + ) + (> quota 5.0e+03) + ) + """ + +# Run tests +alltests = [globals()[testname] for testname in sorted(locals()) if testname.startswith("test")] + +sexp.runTests(alltests, fullDump=False) diff --git a/examples/wordsToNum.py b/examples/wordsToNum.py index d9511da..71538ba 100644 --- a/examples/wordsToNum.py +++ b/examples/wordsToNum.py @@ -1,110 +1,110 @@ -# wordsToNum.py
-# Copyright 2006, Paul McGuire
-#
-# Sample parser grammar to read a number given in words, and return the numeric value.
-#
-import pyparsing as pp
-from operator import mul
-from functools import reduce
-
-def makeLit(s, val):
- ret = pp.CaselessLiteral(s)
- return ret.setParseAction(pp.replaceWith(val))
-
-unitDefinitions = [
- ("zero", 0),
- ("oh", 0),
- ("zip", 0),
- ("zilch", 0),
- ("nada", 0),
- ("bupkis", 0),
- ("one", 1),
- ("two", 2),
- ("three", 3),
- ("four", 4),
- ("five", 5),
- ("six", 6),
- ("seven", 7),
- ("eight", 8),
- ("nine", 9),
- ("ten", 10),
- ("eleven", 11),
- ("twelve", 12),
- ("thirteen", 13),
- ("fourteen", 14),
- ("fifteen", 15),
- ("sixteen", 16),
- ("seventeen", 17),
- ("eighteen", 18),
- ("nineteen", 19),
- ]
-units = pp.MatchFirst(makeLit(s,v) for s,v in sorted(unitDefinitions, key=lambda d: -len(d[0])))
-
-tensDefinitions = [
- ("ten", 10),
- ("twenty", 20),
- ("thirty", 30),
- ("forty", 40),
- ("fourty", 40), # for the spelling-challenged...
- ("fifty", 50),
- ("sixty", 60),
- ("seventy", 70),
- ("eighty", 80),
- ("ninety", 90),
- ]
-tens = pp.MatchFirst(makeLit(s,v) for s,v in tensDefinitions)
-
-hundreds = makeLit("hundred", 100)
-
-majorDefinitions = [
- ("thousand", int(1e3)),
- ("million", int(1e6)),
- ("billion", int(1e9)),
- ("trillion", int(1e12)),
- ("quadrillion", int(1e15)),
- ("quintillion", int(1e18)),
- ]
-mag = pp.MatchFirst(makeLit(s,v) for s,v in majorDefinitions)
-
-wordprod = lambda t: reduce(mul,t)
-numPart = ((((units + pp.Optional(hundreds)).setParseAction(wordprod)
- + pp.Optional(tens)
- ).setParseAction(sum)
- ^ tens)
- + pp.Optional(units)
- ).setParseAction(sum)
-numWords = ((numPart + pp.Optional(mag)).setParseAction(wordprod)[...]).setParseAction(sum)
-numWords.setName("num word parser")
-
-numWords.ignore(pp.Literal("-"))
-numWords.ignore(pp.CaselessLiteral("and"))
-
-tests = """
- one hundred twenty hundred, None
- one hundred and twennty, None
- one hundred and twenty, 120
- one hundred and three, 103
- one hundred twenty-three, 123
- one hundred and twenty three, 123
- one hundred twenty three million, 123000000
- one hundred and twenty three million, 123000000
- one hundred twenty three million and three, 123000003
- fifteen hundred and sixty five, 1565
- seventy-seven thousand eight hundred and nineteen, 77819
- seven hundred seventy-seven thousand seven hundred and seventy-seven, 777777
- zero, 0
- forty two, 42
- fourty two, 42
-"""
-
-# use '| ...' to indicate "if omitted, skip to next" logic
-test_expr = (numWords('result') | ...) + ',' + (pp.pyparsing_common.integer('expected') | 'None')
-
-def verify_result(t):
- if '_skipped' in t:
- t['pass'] = False
- elif 'expected' in t:
- t['pass'] = t.result == t.expected
-test_expr.addParseAction(verify_result)
-
-test_expr.runTests(tests)
+# wordsToNum.py +# Copyright 2006, Paul McGuire +# +# Sample parser grammar to read a number given in words, and return the numeric value. +# +import pyparsing as pp +from operator import mul +from functools import reduce + +def makeLit(s, val): + ret = pp.CaselessLiteral(s) + return ret.setParseAction(pp.replaceWith(val)) + +unitDefinitions = [ + ("zero", 0), + ("oh", 0), + ("zip", 0), + ("zilch", 0), + ("nada", 0), + ("bupkis", 0), + ("one", 1), + ("two", 2), + ("three", 3), + ("four", 4), + ("five", 5), + ("six", 6), + ("seven", 7), + ("eight", 8), + ("nine", 9), + ("ten", 10), + ("eleven", 11), + ("twelve", 12), + ("thirteen", 13), + ("fourteen", 14), + ("fifteen", 15), + ("sixteen", 16), + ("seventeen", 17), + ("eighteen", 18), + ("nineteen", 19), + ] +units = pp.MatchFirst(makeLit(s,v) for s,v in sorted(unitDefinitions, key=lambda d: -len(d[0]))) + +tensDefinitions = [ + ("ten", 10), + ("twenty", 20), + ("thirty", 30), + ("forty", 40), + ("fourty", 40), # for the spelling-challenged... + ("fifty", 50), + ("sixty", 60), + ("seventy", 70), + ("eighty", 80), + ("ninety", 90), + ] +tens = pp.MatchFirst(makeLit(s,v) for s,v in tensDefinitions) + +hundreds = makeLit("hundred", 100) + +majorDefinitions = [ + ("thousand", int(1e3)), + ("million", int(1e6)), + ("billion", int(1e9)), + ("trillion", int(1e12)), + ("quadrillion", int(1e15)), + ("quintillion", int(1e18)), + ] +mag = pp.MatchFirst(makeLit(s,v) for s,v in majorDefinitions) + +wordprod = lambda t: reduce(mul,t) +numPart = ((((units + pp.Optional(hundreds)).setParseAction(wordprod) + + pp.Optional(tens) + ).setParseAction(sum) + ^ tens) + + pp.Optional(units) + ).setParseAction(sum) +numWords = ((numPart + pp.Optional(mag)).setParseAction(wordprod)[1, ...]).setParseAction(sum) +numWords.setName("num word parser") + +numWords.ignore(pp.Literal("-")) +numWords.ignore(pp.CaselessLiteral("and")) + +tests = """ + one hundred twenty hundred, None + one hundred and twennty, None + one hundred and twenty, 120 + one hundred and three, 103 + one hundred twenty-three, 123 + one hundred and twenty three, 123 + one hundred twenty three million, 123000000 + one hundred and twenty three million, 123000000 + one hundred twenty three million and three, 123000003 + fifteen hundred and sixty five, 1565 + seventy-seven thousand eight hundred and nineteen, 77819 + seven hundred seventy-seven thousand seven hundred and seventy-seven, 777777 + zero, 0 + forty two, 42 + fourty two, 42 +""" + +# use '| ...' to indicate "if omitted, skip to next" logic +test_expr = (numWords('result') | ...) + ',' + (pp.pyparsing_common.integer('expected') | 'None') + +def verify_result(t): + if '_skipped' in t: + t['pass'] = False + elif 'expected' in t: + t['pass'] = t.result == t.expected +test_expr.addParseAction(verify_result) + +test_expr.runTests(tests) |