Fix type annotation for ranges in unicode_sets; make _get_chars_for_ranges a lazyclassproperty

author: ptmcg <ptmcg@austin.rr.com> 2021-09-27 05:47:27 -0500
committer: ptmcg <ptmcg@austin.rr.com> 2021-09-27 05:47:27 -0500
commit: 4e306edc905c8596436220f72dd2ce04b274bb29 (patch)
tree: 9eecdafef340bf8e75d823939b9554b550b513bb
parent: d9b2b46f98e262661a2236b62f89bbadb42edda6 (diff)
download: pyparsing-git-4e306edc905c8596436220f72dd2ce04b274bb29.tar.gz
2 files changed, 38 insertions, 28 deletions
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index 45de791..7e2a144 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -105,7 +105,7 @@ __version__ = "{}.{}.{}".format(*__version_info__[:3]) + (
     ),
     "",
 )[__version_info__.release_level == "final"]
-__version_time__ = "25 September 2021 17:02 UTC"
+__version_time__ = "27 September 2021 10:38 UTC"
 __versionTime__ = __version_time__
 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
 
@@ -119,7 +119,7 @@ from .core import _builtin_exprs as core_builtin_exprs
 from .helpers import *
 from .helpers import _builtin_exprs as helper_builtin_exprs
 
-from .unicode import unicode_set, pyparsing_unicode as unicode
+from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
 from .testing import pyparsing_test as testing
 from .common import (
     pyparsing_common as common,
diff --git a/pyparsing/unicode.py b/pyparsing/unicode.py
index cbf6865..9ee6710 100644
--- a/pyparsing/unicode.py
+++ b/pyparsing/unicode.py
@@ -2,7 +2,7 @@
 
 import sys
 from itertools import filterfalse
-from typing import List, Tuple
+from typing import List, Tuple, Union
 
 
 class _lazyclassproperty:
@@ -25,14 +25,24 @@ class _lazyclassproperty:
         return cls._intern[attrname]
 
 
+UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
+
+
 class unicode_set:
     """
     A set of Unicode characters, for language-specific strings for
     ``alphas``, ``nums``, ``alphanums``, and ``printables``.
     A unicode_set is defined by a list of ranges in the Unicode character
-    set, in a class attribute ``_ranges``, such as::
+    set, in a class attribute ``_ranges``. Ranges can be specified using
+    2-tuples or a 1-tuple, such as::
 
-        _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
+        _ranges = [
+            (0x0020, 0x007e),
+            (0x00a0, 0x00ff),
+            (0x0100,),
+            ]
+
+    Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
 
     A unicode set can also be defined using multiple inheritance of other unicode sets::
 
@@ -40,10 +50,10 @@ class unicode_set:
             pass
     """
 
-    _ranges: List[Tuple[int, ...]] = []
+    _ranges: UnicodeRangeList = []
 
-    @classmethod
-    def _get_chars_for_ranges(cls):
+    @_lazyclassproperty
+    def _chars_for_ranges(cls):
         ret = []
         for cc in cls.__mro__:
             if cc is unicode_set:
@@ -55,17 +65,17 @@ class unicode_set:
     @_lazyclassproperty
     def printables(cls):
         "all non-whitespace characters in this range"
-        return "".join(filterfalse(str.isspace, cls._get_chars_for_ranges()))
+        return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
 
     @_lazyclassproperty
     def alphas(cls):
         "all alphabetic characters in this range"
-        return "".join(filter(str.isalpha, cls._get_chars_for_ranges()))
+        return "".join(filter(str.isalpha, cls._chars_for_ranges))
 
     @_lazyclassproperty
     def nums(cls):
         "all numeric digit characters in this range"
-        return "".join(filter(str.isdigit, cls._get_chars_for_ranges()))
+        return "".join(filter(str.isdigit, cls._chars_for_ranges))
 
     @_lazyclassproperty
     def alphanums(cls):
@@ -76,7 +86,7 @@ class unicode_set:
     def identchars(cls):
         "all characters in this range that are valid identifier characters, plus underscore '_'"
         return (
-            "".join(filter(str.isidentifier, cls._get_chars_for_ranges()))
+            "".join(filter(str.isidentifier, cls._chars_for_ranges))
             + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
             + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
             + "_"
@@ -96,30 +106,30 @@ class pyparsing_unicode(unicode_set):
     A namespace class for defining common language unicode_sets.
     """
 
-    _ranges: List[Tuple[int, ...]] = [(32, sys.maxunicode)]
+    _ranges: UnicodeRangeList = [(32, sys.maxunicode)]
 
     class Latin1(unicode_set):
         "Unicode set for Latin-1 Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x0020, 0x007E),
             (0x00A0, 0x00FF),
         ]
 
     class LatinA(unicode_set):
         "Unicode set for Latin-A Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x0100, 0x017F),
         ]
 
     class LatinB(unicode_set):
         "Unicode set for Latin-B Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x0180, 0x024F),
         ]
 
     class Greek(unicode_set):
         "Unicode set for Greek Unicode Character Ranges"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x0342, 0x0345),
             (0x0370, 0x0377),
             (0x037A, 0x037F),
@@ -159,7 +169,7 @@ class pyparsing_unicode(unicode_set):
 
     class Cyrillic(unicode_set):
         "Unicode set for Cyrillic Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x0400, 0x052F),
             (0x1C80, 0x1C88),
             (0x1D2B,),
@@ -172,7 +182,7 @@ class pyparsing_unicode(unicode_set):
 
     class Chinese(unicode_set):
         "Unicode set for Chinese Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x2E80, 0x2E99),
             (0x2E9B, 0x2EF3),
             (0x31C0, 0x31E3),
@@ -195,18 +205,18 @@ class pyparsing_unicode(unicode_set):
 
     class Japanese(unicode_set):
         "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
-        _ranges: List[Tuple[int, ...]] = []
+        _ranges: UnicodeRangeList = []
 
         class Kanji(unicode_set):
             "Unicode set for Kanji Unicode Character Range"
-            _ranges: List[Tuple[int, ...]] = [
+            _ranges: UnicodeRangeList = [
                 (0x4E00, 0x9FBF),
                 (0x3000, 0x303F),
             ]
 
         class Hiragana(unicode_set):
             "Unicode set for Hiragana Unicode Character Range"
-            _ranges: List[Tuple[int, ...]] = [
+            _ranges: UnicodeRangeList = [
                 (0x3041, 0x3096),
                 (0x3099, 0x30A0),
                 (0x30FC,),
@@ -218,7 +228,7 @@ class pyparsing_unicode(unicode_set):
 
         class Katakana(unicode_set):
             "Unicode set for Katakana  Unicode Character Range"
-            _ranges: List[Tuple[int, ...]] = [
+            _ranges: UnicodeRangeList = [
                 (0x3099, 0x309C),
                 (0x30A0, 0x30FF),
                 (0x31F0, 0x31FF),
@@ -232,7 +242,7 @@ class pyparsing_unicode(unicode_set):
 
     class Hangul(unicode_set):
         "Unicode set for Hangul (Korean) Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x1100, 0x11FF),
             (0x302E, 0x302F),
             (0x3131, 0x318E),
@@ -258,11 +268,11 @@ class pyparsing_unicode(unicode_set):
 
     class Thai(unicode_set):
         "Unicode set for Thai Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)]
+        _ranges: UnicodeRangeList = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)]
 
     class Arabic(unicode_set):
         "Unicode set for Arabic Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x0600, 0x061B),
             (0x061E, 0x06FF),
             (0x0700, 0x077F),
@@ -270,7 +280,7 @@ class pyparsing_unicode(unicode_set):
 
     class Hebrew(unicode_set):
         "Unicode set for Hebrew Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [
+        _ranges: UnicodeRangeList = [
             (0x0591, 0x05C7),
             (0x05D0, 0x05EA),
             (0x05EF, 0x05F4),
@@ -284,7 +294,7 @@ class pyparsing_unicode(unicode_set):
 
     class Devanagari(unicode_set):
         "Unicode set for Devanagari Unicode Character Range"
-        _ranges: List[Tuple[int, ...]] = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)]
+        _ranges: UnicodeRangeList = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)]
 
 
 pyparsing_unicode.Japanese._ranges = (
author	ptmcg <ptmcg@austin.rr.com>	2021-09-27 05:47:27 -0500
committer	ptmcg <ptmcg@austin.rr.com>	2021-09-27 05:47:27 -0500
commit	4e306edc905c8596436220f72dd2ce04b274bb29 (patch)
tree	9eecdafef340bf8e75d823939b9554b550b513bb
parent	d9b2b46f98e262661a2236b62f89bbadb42edda6 (diff)
download	pyparsing-git-4e306edc905c8596436220f72dd2ce04b274bb29.tar.gz