summaryrefslogtreecommitdiff
path: root/pyparsing/helpers.py
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2021-10-26 15:53:53 -0500
committerptmcg <ptmcg@austin.rr.com>2021-10-26 15:53:53 -0500
commitddfcd6b656b8ee11517126348fa17d26332cc6cc (patch)
tree5642f49e0e2098eaaf30c39297444172cf6c1b11 /pyparsing/helpers.py
parentac7d5a9f3c7a4ff5f617f7488944aaad63c4e47f (diff)
downloadpyparsing-git-ddfcd6b656b8ee11517126348fa17d26332cc6cc.tar.gz
Fix one_of to generate regex internally, even if caseless or as_keyword given as True
Diffstat (limited to 'pyparsing/helpers.py')
-rw-r--r--pyparsing/helpers.py38
1 files changed, 26 insertions, 12 deletions
diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py
index 2d112de..0dde451 100644
--- a/pyparsing/helpers.py
+++ b/pyparsing/helpers.py
@@ -1,5 +1,6 @@
# helpers.py
import html.entities
+import re
from . import __diag__
from .core import *
@@ -253,9 +254,9 @@ def one_of(
if not symbols:
return NoMatch()
- if not asKeyword:
- # if not producing keywords, need to reorder to take care to avoid masking
- # longer choices with shorter ones
+ # reorder given symbols to take care to avoid masking longer choices with shorter ones
+ # (but only if the given symbols are not just single characters)
+ if any(len(sym) > 1 for sym in symbols):
i = 0
while i < len(symbols) - 1:
cur = symbols[i]
@@ -270,17 +271,30 @@ def one_of(
else:
i += 1
- if not (caseless or asKeyword) and useRegex:
- # ~ print(strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols]))
+ if useRegex:
+ re_flags: int = re.IGNORECASE if caseless else 0
+
try:
- if len(symbols) == len("".join(symbols)):
- return Regex(
- "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)
- ).set_name(" | ".join(symbols))
+ if all(len(sym) == 1 for sym in symbols):
+ # symbols are just single characters, create range regex pattern
+ patt = "[{}]".format("".join(_escapeRegexRangeChars(sym) for sym in symbols))
else:
- return Regex("|".join(re.escape(sym) for sym in symbols)).set_name(
- " | ".join(symbols)
- )
+ patt = "|".join(re.escape(sym) for sym in symbols)
+
+ # wrap with \b word break markers if defining as keywords
+ if asKeyword:
+ patt = r"\b(:?{})\b".format(patt)
+
+ ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
+
+ if caseless:
+ # add parse action to return symbols as specified, not in random
+ # casing as found in input string
+ symbol_map = {sym.lower(): sym for sym in symbols}
+ ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
+
+ return ret
+
except sre_constants.error:
warnings.warn(
"Exception creating Regex for one_of, building MatchFirst", stacklevel=2