diff options
author | ptmcg <ptmcg@austin.rr.com> | 2021-10-26 15:53:53 -0500 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2021-10-26 15:53:53 -0500 |
commit | ddfcd6b656b8ee11517126348fa17d26332cc6cc (patch) | |
tree | 5642f49e0e2098eaaf30c39297444172cf6c1b11 /pyparsing/helpers.py | |
parent | ac7d5a9f3c7a4ff5f617f7488944aaad63c4e47f (diff) | |
download | pyparsing-git-ddfcd6b656b8ee11517126348fa17d26332cc6cc.tar.gz |
Fix one_of to generate regex internally, even if caseless or as_keyword given as True
Diffstat (limited to 'pyparsing/helpers.py')
-rw-r--r-- | pyparsing/helpers.py | 38 |
1 files changed, 26 insertions, 12 deletions
diff --git a/pyparsing/helpers.py b/pyparsing/helpers.py index 2d112de..0dde451 100644 --- a/pyparsing/helpers.py +++ b/pyparsing/helpers.py @@ -1,5 +1,6 @@ # helpers.py import html.entities +import re from . import __diag__ from .core import * @@ -253,9 +254,9 @@ def one_of( if not symbols: return NoMatch() - if not asKeyword: - # if not producing keywords, need to reorder to take care to avoid masking - # longer choices with shorter ones + # reorder given symbols to take care to avoid masking longer choices with shorter ones + # (but only if the given symbols are not just single characters) + if any(len(sym) > 1 for sym in symbols): i = 0 while i < len(symbols) - 1: cur = symbols[i] @@ -270,17 +271,30 @@ def one_of( else: i += 1 - if not (caseless or asKeyword) and useRegex: - # ~ print(strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols])) + if useRegex: + re_flags: int = re.IGNORECASE if caseless else 0 + try: - if len(symbols) == len("".join(symbols)): - return Regex( - "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) - ).set_name(" | ".join(symbols)) + if all(len(sym) == 1 for sym in symbols): + # symbols are just single characters, create range regex pattern + patt = "[{}]".format("".join(_escapeRegexRangeChars(sym) for sym in symbols)) else: - return Regex("|".join(re.escape(sym) for sym in symbols)).set_name( - " | ".join(symbols) - ) + patt = "|".join(re.escape(sym) for sym in symbols) + + # wrap with \b word break markers if defining as keywords + if asKeyword: + patt = r"\b(:?{})\b".format(patt) + + ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols)) + + if caseless: + # add parse action to return symbols as specified, not in random + # casing as found in input string + symbol_map = {sym.lower(): sym for sym in symbols} + ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()]) + + return ret + except sre_constants.error: warnings.warn( "Exception creating Regex for one_of, building MatchFirst", stacklevel=2 |