1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
# wordsToNum.py
# Copyright 2006, Paul McGuire
#
# Sample parser grammar to read a number given in words, and return the numeric value.
#
import pyparsing as pp
from operator import mul
from functools import reduce
def makeLit(s, val):
ret = pp.CaselessLiteral(s)
return ret.setParseAction(pp.replaceWith(val))
unitDefinitions = [
("zero", 0),
("oh", 0),
("zip", 0),
("zilch", 0),
("nada", 0),
("bupkis", 0),
("one", 1),
("two", 2),
("three", 3),
("four", 4),
("five", 5),
("six", 6),
("seven", 7),
("eight", 8),
("nine", 9),
("ten", 10),
("eleven", 11),
("twelve", 12),
("thirteen", 13),
("fourteen", 14),
("fifteen", 15),
("sixteen", 16),
("seventeen", 17),
("eighteen", 18),
("nineteen", 19),
]
units = pp.MatchFirst(makeLit(s,v) for s,v in sorted(unitDefinitions, key=lambda d: -len(d[0])))
tensDefinitions = [
("ten", 10),
("twenty", 20),
("thirty", 30),
("forty", 40),
("fourty", 40), # for the spelling-challenged...
("fifty", 50),
("sixty", 60),
("seventy", 70),
("eighty", 80),
("ninety", 90),
]
tens = pp.MatchFirst(makeLit(s,v) for s,v in tensDefinitions)
hundreds = makeLit("hundred", 100)
majorDefinitions = [
("thousand", int(1e3)),
("million", int(1e6)),
("billion", int(1e9)),
("trillion", int(1e12)),
("quadrillion", int(1e15)),
("quintillion", int(1e18)),
]
mag = pp.MatchFirst(makeLit(s,v) for s,v in majorDefinitions)
wordprod = lambda t: reduce(mul,t)
numPart = ((((units + pp.Optional(hundreds)).setParseAction(wordprod)
+ pp.Optional(tens)
).setParseAction(sum)
^ tens)
+ pp.Optional(units)
).setParseAction(sum)
numWords = ((numPart + pp.Optional(mag)).setParseAction(wordprod)[1, ...]).setParseAction(sum)
numWords.setName("num word parser")
numWords.ignore(pp.Literal("-"))
numWords.ignore(pp.CaselessLiteral("and"))
tests = """
one hundred twenty hundred, None
one hundred and twennty, None
one hundred and twenty, 120
one hundred and three, 103
one hundred twenty-three, 123
one hundred and twenty three, 123
one hundred twenty three million, 123000000
one hundred and twenty three million, 123000000
one hundred twenty three million and three, 123000003
fifteen hundred and sixty five, 1565
seventy-seven thousand eight hundred and nineteen, 77819
seven hundred seventy-seven thousand seven hundred and seventy-seven, 777777
zero, 0
forty two, 42
fourty two, 42
"""
# use '| ...' to indicate "if omitted, skip to next" logic
test_expr = (numWords('result') | ...) + ',' + (pp.pyparsing_common.integer('expected') | 'None')
def verify_result(t):
if '_skipped' in t:
t['pass'] = False
elif 'expected' in t:
t['pass'] = t.result == t.expected
test_expr.addParseAction(verify_result)
test_expr.runTests(tests)
|