diff options
author | ptmcg <ptmcg@austin.rr.com> | 2021-12-03 05:28:22 -0600 |
---|---|---|
committer | ptmcg <ptmcg@austin.rr.com> | 2021-12-06 09:47:22 -0600 |
commit | 1c75c556f7b1ff5470a4d16f4ae05a7c20f89f31 (patch) | |
tree | 3240e8c389f355064bae22eb2f789dde24bc7447 | |
parent | 514041e01688524332987314892f3110c3f9e614 (diff) | |
download | pyparsing-git-1c75c556f7b1ff5470a4d16f4ae05a7c20f89f31.tar.gz |
Minor enhancement to Word generation of internal regular expression when characters are 2 consecutive chars
-rw-r--r-- | CHANGES | 3 | ||||
-rw-r--r-- | pyparsing/__init__.py | 2 | ||||
-rw-r--r-- | pyparsing/util.py | 5 | ||||
-rw-r--r-- | tests/test_unit.py | 51 |
4 files changed, 38 insertions, 23 deletions
@@ -38,6 +38,9 @@ Version 3.0.7 - In previous versions, the second call to `bool()` would return `False`. +- Minor enhancement to Word generation of internal regular expression, to + emit consecutive characters in range, such as "ab", as "ab", not "a-b". + Version 3.0.6 - --------------- diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index e152fdd..e83ab52 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -126,7 +126,7 @@ class version_info(NamedTuple): __version_info__ = version_info(3, 0, 7, "final", 0) -__version_time__ = "13 Nov 2021 18:44 UTC" +__version_time__ = "03 Dec 2021 11:21 UTC" __version__ = __version_info__.__version__ __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>" diff --git a/pyparsing/util.py b/pyparsing/util.py index 1309ad6..34ce092 100644 --- a/pyparsing/util.py +++ b/pyparsing/util.py @@ -213,9 +213,10 @@ def _collapse_string_to_ranges( if first == last: ret.append(escape_re_range_char(first)) else: + sep = "" if ord(last) == ord(first) + 1 else "-" ret.append( - "{}-{}".format( - escape_re_range_char(first), escape_re_range_char(last) + "{}{}{}".format( + escape_re_range_char(first), sep, escape_re_range_char(last) ) ) else: diff --git a/tests/test_unit.py b/tests/test_unit.py index dbfdfe5..95183a3 100644 --- a/tests/test_unit.py +++ b/tests/test_unit.py @@ -8177,28 +8177,39 @@ class Test02_WithoutPackrat(ppt.TestParseResultsAsserts, TestCase): "__diag__.{} not set to True".format(diag_name), ) - def testWordInternalReRangesKnownSets(self): - self.assertEqual( - "[!-~]+", - pp.Word(pp.printables).reString, - "failed to generate correct internal re", - ) - self.assertEqual( - "[0-9A-Za-z]+", - pp.Word(pp.alphanums).reString, - "failed to generate correct internal re", - ) - self.assertEqual( - "[!-~¡-ÿ]+", - pp.Word(pp.pyparsing_unicode.Latin1.printables).reString, - "failed to generate correct internal re", - ) - self.assertEqual( - "[À-ÖØ-öø-ÿ]+", - pp.Word(pp.alphas8bit).reString, - "failed to generate correct internal re", + def testWordInternalReRangeWithConsecutiveChars(self): + self.assertParseAndCheckList( + pp.Word("ABCDEMNXYZ"), + "ABCDEMNXYZABCDEMNXYZABCDEMNXYZ", + ["ABCDEMNXYZABCDEMNXYZABCDEMNXYZ"] ) + def testWordInternalReRangesKnownSet(self): + tests = [ + ("ABCDEMNXYZ", "[A-EMNX-Z]+"), + (pp.printables, "[!-~]+"), + (pp.alphanums, "[0-9A-Za-z]+"), + (pp.pyparsing_unicode.Latin1.printables, "[!-~¡-ÿ]+"), + (pp.pyparsing_unicode.Latin1.alphanums, "[0-9A-Za-zª²³µ¹ºÀ-ÖØ-öø-ÿ]+"), + (pp.alphas8bit, "[À-ÖØ-öø-ÿ]+"), + ] + failed = [] + for word_string, expected_re in tests: + try: + msg = "failed to generate correct internal re for {!r}".format(word_string) + resultant_re = pp.Word(word_string).reString + self.assertEqual( + expected_re, + resultant_re, + msg + "; expected {!r} got {!r}".format(expected_re, resultant_re) + ) + except AssertionError: + failed.append(msg) + + if failed: + print("Errors:\n{}".format("\n".join(failed))) + self.fail("failed to generate correct internal re's") + def testWordInternalReRanges(self): import random |