summaryrefslogtreecommitdiff
path: root/examples/chemical_formulas.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/chemical_formulas.py')
-rw-r--r--examples/chemical_formulas.py119
1 files changed, 119 insertions, 0 deletions
diff --git a/examples/chemical_formulas.py b/examples/chemical_formulas.py
new file mode 100644
index 0000000..16d4bb4
--- /dev/null
+++ b/examples/chemical_formulas.py
@@ -0,0 +1,119 @@
+#
+# chemicalFormulas.py
+#
+# Copyright (c) 2003,2019 Paul McGuire
+#
+
+import pyparsing as pp
+
+atomic_weight = {
+ "O": 15.9994,
+ "H": 1.00794,
+ "Na": 22.9897,
+ "Cl": 35.4527,
+ "C": 12.0107,
+}
+
+digits = "0123456789"
+
+# Version 1
+element = pp.Word(pp.alphas.upper(), pp.alphas.lower(), max=2).set_name("element")
+# for stricter matching, use this Regex instead
+# element = Regex("A[cglmrstu]|B[aehikr]?|C[adeflmorsu]?|D[bsy]|"
+# "E[rsu]|F[emr]?|G[ade]|H[efgos]?|I[nr]?|Kr?|L[airu]|"
+# "M[dgnot]|N[abdeiop]?|Os?|P[abdmortu]?|R[abefghnu]|"
+# "S[bcegimnr]?|T[abcehilm]|U(u[bhopqst])?|V|W|Xe|Yb?|Z[nr]")
+element_ref = pp.Group(element + pp.Opt(pp.Word(digits), default="1"))
+formula = element_ref[...]
+
+
+def sum_atomic_weights(element_list):
+ return sum(atomic_weight[elem] * int(qty) for elem, qty in element_list)
+
+
+formula.run_tests(
+ """\
+ H2O
+ C6H5OH
+ NaCl
+ """,
+ full_dump=False,
+ post_parse=lambda _, tokens: f"Molecular weight: {sum_atomic_weights(tokens)}",
+)
+print()
+
+
+# Version 2 - access parsed items by results name
+element_ref = pp.Group(
+ element("symbol") + pp.Opt(pp.Word(digits), default="1")("qty")
+)
+formula = element_ref[...]
+
+
+def sum_atomic_weights_by_results_name(element_list):
+ return sum(atomic_weight[elem.symbol] * int(elem.qty) for elem in element_list)
+
+
+formula.run_tests(
+ """\
+ H2O
+ C6H5OH
+ NaCl
+ """,
+ full_dump=False,
+ post_parse=lambda _, tokens:
+ f"Molecular weight: {sum_atomic_weights_by_results_name(tokens)}",
+)
+print()
+
+# Version 3 - convert integers during parsing process
+integer = pp.Word(digits).set_name("integer")
+integer.add_parse_action(lambda t: int(t[0]))
+element_ref = pp.Group(element("symbol") + pp.Opt(integer, default=1)("qty"))
+formula = element_ref[...].set_name("chemical_formula")
+
+
+def sum_atomic_weights_by_results_name_with_converted_ints(element_list):
+ return sum(atomic_weight[elem.symbol] * int(elem.qty) for elem in element_list)
+
+
+formula.run_tests(
+ """\
+ H2O
+ C6H5OH
+ NaCl
+ """,
+ full_dump=False,
+ post_parse=lambda _, tokens:
+ f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}",
+)
+print()
+
+# Version 4 - parse and convert integers as subscript digits
+subscript_digits = "₀₁₂₃₄₅₆₇₈₉"
+subscript_int_map = {e[1]: e[0] for e in enumerate(subscript_digits)}
+
+
+def cvt_subscript_int(s):
+ ret = 0
+ for c in s[0]:
+ ret = ret * 10 + subscript_int_map[c]
+ return ret
+
+
+subscript_int = pp.Word(subscript_digits).set_name("subscript")
+subscript_int.add_parse_action(cvt_subscript_int)
+
+element_ref = pp.Group(element("symbol") + pp.Opt(subscript_int, default=1)("qty"))
+formula = element_ref[1, ...].set_name("chemical_formula")
+formula.run_tests(
+ """\
+ H₂O
+ C₆H₅OH
+ NaCl
+ """,
+ full_dump=False,
+ post_parse=lambda _, tokens:
+ f"Molecular weight: {sum_atomic_weights_by_results_name_with_converted_ints(tokens)}",
+)
+print()