examples/wordsToNum.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126

# wordsToNum.py
# Copyright 2006, Paul McGuire
#
# Sample parser grammar to read a number given in words, and return the numeric value.
#
import pyparsing as pp
from operator import mul
from functools import reduce


def makeLit(s, val):
    ret = pp.CaselessLiteral(s)
    return ret.setParseAction(pp.replaceWith(val))


unitDefinitions = [
    ("zero", 0),
    ("oh", 0),
    ("zip", 0),
    ("zilch", 0),
    ("nada", 0),
    ("bupkis", 0),
    ("one", 1),
    ("two", 2),
    ("three", 3),
    ("four", 4),
    ("five", 5),
    ("six", 6),
    ("seven", 7),
    ("eight", 8),
    ("nine", 9),
    ("ten", 10),
    ("eleven", 11),
    ("twelve", 12),
    ("thirteen", 13),
    ("fourteen", 14),
    ("fifteen", 15),
    ("sixteen", 16),
    ("seventeen", 17),
    ("eighteen", 18),
    ("nineteen", 19),
]
units = pp.MatchFirst(
    makeLit(s, v) for s, v in sorted(unitDefinitions, key=lambda d: -len(d[0]))
)

tensDefinitions = [
    ("ten", 10),
    ("twenty", 20),
    ("thirty", 30),
    ("forty", 40),
    ("fourty", 40),  # for the spelling-challenged...
    ("fifty", 50),
    ("sixty", 60),
    ("seventy", 70),
    ("eighty", 80),
    ("ninety", 90),
]
tens = pp.MatchFirst(makeLit(s, v) for s, v in tensDefinitions)

hundreds = makeLit("hundred", 100)

majorDefinitions = [
    ("thousand", int(1e3)),
    ("million", int(1e6)),
    ("billion", int(1e9)),
    ("trillion", int(1e12)),
    ("quadrillion", int(1e15)),
    ("quintillion", int(1e18)),
]
mag = pp.MatchFirst(makeLit(s, v) for s, v in majorDefinitions)

wordprod = lambda t: reduce(mul, t)
numPart = (
    (
        (
            (units + pp.Optional(hundreds)).setParseAction(wordprod) + pp.Optional(tens)
        ).setParseAction(sum)
        ^ tens
    )
    + pp.Optional(units)
).setParseAction(sum)
numWords = (
    (numPart + pp.Optional(mag)).setParseAction(wordprod)[1, ...]
).setParseAction(sum)
numWords.setName("num word parser")

numWords.ignore(pp.Literal("-"))
numWords.ignore(pp.CaselessLiteral("and"))

tests = """
    one hundred twenty hundred, None
    one hundred and twennty, None
    one hundred and twenty, 120
    one hundred and three, 103
    one hundred twenty-three, 123
    one hundred and twenty three, 123
    one hundred twenty three million, 123000000
    one hundred and twenty three million, 123000000
    one hundred twenty three million and three, 123000003
    fifteen hundred and sixty five, 1565
    seventy-seven thousand eight hundred and nineteen, 77819
    seven hundred seventy-seven thousand seven hundred and seventy-seven, 777777
    zero, 0
    forty two, 42
    fourty two, 42
"""

# use '| ...' to indicate "if omitted, skip to next" logic
test_expr = (
    (numWords("result") | ...)
    + ","
    + (pp.pyparsing_common.integer("expected") | "None")
)


def verify_result(t):
    if "_skipped" in t:
        t["pass"] = False
    elif "expected" in t:
        t["pass"] = t.result == t.expected


test_expr.addParseAction(verify_result)

test_expr.runTests(tests)