summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2021-12-03 05:28:22 -0600
committerptmcg <ptmcg@austin.rr.com>2021-12-06 09:47:22 -0600
commit1c75c556f7b1ff5470a4d16f4ae05a7c20f89f31 (patch)
tree3240e8c389f355064bae22eb2f789dde24bc7447
parent514041e01688524332987314892f3110c3f9e614 (diff)
downloadpyparsing-git-1c75c556f7b1ff5470a4d16f4ae05a7c20f89f31.tar.gz
Minor enhancement to Word generation of internal regular expression when characters are 2 consecutive chars
-rw-r--r--CHANGES3
-rw-r--r--pyparsing/__init__.py2
-rw-r--r--pyparsing/util.py5
-rw-r--r--tests/test_unit.py51
4 files changed, 38 insertions, 23 deletions
diff --git a/CHANGES b/CHANGES
index bbf643d..48e8fba 100644
--- a/CHANGES
+++ b/CHANGES
@@ -38,6 +38,9 @@ Version 3.0.7 -
In previous versions, the second call to `bool()` would return `False`.
+- Minor enhancement to Word generation of internal regular expression, to
+ emit consecutive characters in range, such as "ab", as "ab", not "a-b".
+
Version 3.0.6 -
---------------
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index e152fdd..e83ab52 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -126,7 +126,7 @@ class version_info(NamedTuple):
__version_info__ = version_info(3, 0, 7, "final", 0)
-__version_time__ = "13 Nov 2021 18:44 UTC"
+__version_time__ = "03 Dec 2021 11:21 UTC"
__version__ = __version_info__.__version__
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
diff --git a/pyparsing/util.py b/pyparsing/util.py
index 1309ad6..34ce092 100644
--- a/pyparsing/util.py
+++ b/pyparsing/util.py
@@ -213,9 +213,10 @@ def _collapse_string_to_ranges(
if first == last:
ret.append(escape_re_range_char(first))
else:
+ sep = "" if ord(last) == ord(first) + 1 else "-"
ret.append(
- "{}-{}".format(
- escape_re_range_char(first), escape_re_range_char(last)
+ "{}{}{}".format(
+ escape_re_range_char(first), sep, escape_re_range_char(last)
)
)
else:
diff --git a/tests/test_unit.py b/tests/test_unit.py
index dbfdfe5..95183a3 100644
--- a/tests/test_unit.py
+++ b/tests/test_unit.py
@@ -8177,28 +8177,39 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase):
"__diag__.{} not set to True".format(diag_name),
)
- def testWordInternalReRangesKnownSets(self):
- self.assertEqual(
- "[!-~]+",
- pp.Word(pp.printables).reString,
- "failed to generate correct internal re",
- )
- self.assertEqual(
- "[0-9A-Za-z]+",
- pp.Word(pp.alphanums).reString,
- "failed to generate correct internal re",
- )
- self.assertEqual(
- "[!-~¡-ÿ]+",
- pp.Word(pp.pyparsing_unicode.Latin1.printables).reString,
- "failed to generate correct internal re",
- )
- self.assertEqual(
- "[À-ÖØ-öø-ÿ]+",
- pp.Word(pp.alphas8bit).reString,
- "failed to generate correct internal re",
+ def testWordInternalReRangeWithConsecutiveChars(self):
+ self.assertParseAndCheckList(
+ pp.Word("ABCDEMNXYZ"),
+ "ABCDEMNXYZABCDEMNXYZABCDEMNXYZ",
+ ["ABCDEMNXYZABCDEMNXYZABCDEMNXYZ"]
)
+ def testWordInternalReRangesKnownSet(self):
+ tests = [
+ ("ABCDEMNXYZ", "[A-EMNX-Z]+"),
+ (pp.printables, "[!-~]+"),
+ (pp.alphanums, "[0-9A-Za-z]+"),
+ (pp.pyparsing_unicode.Latin1.printables, "[!-~¡-ÿ]+"),
+ (pp.pyparsing_unicode.Latin1.alphanums, "[0-9A-Za-zª²³µ¹ºÀ-ÖØ-öø-ÿ]+"),
+ (pp.alphas8bit, "[À-ÖØ-öø-ÿ]+"),
+ ]
+ failed = []
+ for word_string, expected_re in tests:
+ try:
+ msg = "failed to generate correct internal re for {!r}".format(word_string)
+ resultant_re = pp.Word(word_string).reString
+ self.assertEqual(
+ expected_re,
+ resultant_re,
+ msg + "; expected {!r} got {!r}".format(expected_re, resultant_re)
+ )
+ except AssertionError:
+ failed.append(msg)
+
+ if failed:
+ print("Errors:\n{}".format("\n".join(failed)))
+ self.fail("failed to generate correct internal re's")
+
def testWordInternalReRanges(self):
import random