diff options
| author | ptmcg <ptmcg@austin.rr.com> | 2021-09-27 05:47:27 -0500 |
|---|---|---|
| committer | ptmcg <ptmcg@austin.rr.com> | 2021-09-27 05:47:27 -0500 |
| commit | 4e306edc905c8596436220f72dd2ce04b274bb29 (patch) | |
| tree | 9eecdafef340bf8e75d823939b9554b550b513bb | |
| parent | d9b2b46f98e262661a2236b62f89bbadb42edda6 (diff) | |
| download | pyparsing-git-4e306edc905c8596436220f72dd2ce04b274bb29.tar.gz | |
Fix type annotation for ranges in unicode_sets; make _get_chars_for_ranges a lazyclassproperty
| -rw-r--r-- | pyparsing/__init__.py | 4 | ||||
| -rw-r--r-- | pyparsing/unicode.py | 62 |
2 files changed, 38 insertions, 28 deletions
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py index 45de791..7e2a144 100644 --- a/pyparsing/__init__.py +++ b/pyparsing/__init__.py @@ -105,7 +105,7 @@ __version__ = "{}.{}.{}".format(*__version_info__[:3]) + ( ), "", )[__version_info__.release_level == "final"] -__version_time__ = "25 September 2021 17:02 UTC" +__version_time__ = "27 September 2021 10:38 UTC" __versionTime__ = __version_time__ __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" @@ -119,7 +119,7 @@ from .core import _builtin_exprs as core_builtin_exprs from .helpers import * from .helpers import _builtin_exprs as helper_builtin_exprs -from .unicode import unicode_set, pyparsing_unicode as unicode +from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode from .testing import pyparsing_test as testing from .common import ( pyparsing_common as common, diff --git a/pyparsing/unicode.py b/pyparsing/unicode.py index cbf6865..9ee6710 100644 --- a/pyparsing/unicode.py +++ b/pyparsing/unicode.py @@ -2,7 +2,7 @@ import sys from itertools import filterfalse -from typing import List, Tuple +from typing import List, Tuple, Union class _lazyclassproperty: @@ -25,14 +25,24 @@ class _lazyclassproperty: return cls._intern[attrname] +UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]] + + class unicode_set: """ A set of Unicode characters, for language-specific strings for ``alphas``, ``nums``, ``alphanums``, and ``printables``. A unicode_set is defined by a list of ranges in the Unicode character - set, in a class attribute ``_ranges``, such as:: + set, in a class attribute ``_ranges``. Ranges can be specified using + 2-tuples or a 1-tuple, such as:: - _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] + _ranges = [ + (0x0020, 0x007e), + (0x00a0, 0x00ff), + (0x0100,), + ] + + Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x). A unicode set can also be defined using multiple inheritance of other unicode sets:: @@ -40,10 +50,10 @@ class unicode_set: pass """ - _ranges: List[Tuple[int, ...]] = [] + _ranges: UnicodeRangeList = [] - @classmethod - def _get_chars_for_ranges(cls): + @_lazyclassproperty + def _chars_for_ranges(cls): ret = [] for cc in cls.__mro__: if cc is unicode_set: @@ -55,17 +65,17 @@ class unicode_set: @_lazyclassproperty def printables(cls): "all non-whitespace characters in this range" - return "".join(filterfalse(str.isspace, cls._get_chars_for_ranges())) + return "".join(filterfalse(str.isspace, cls._chars_for_ranges)) @_lazyclassproperty def alphas(cls): "all alphabetic characters in this range" - return "".join(filter(str.isalpha, cls._get_chars_for_ranges())) + return "".join(filter(str.isalpha, cls._chars_for_ranges)) @_lazyclassproperty def nums(cls): "all numeric digit characters in this range" - return "".join(filter(str.isdigit, cls._get_chars_for_ranges())) + return "".join(filter(str.isdigit, cls._chars_for_ranges)) @_lazyclassproperty def alphanums(cls): @@ -76,7 +86,7 @@ class unicode_set: def identchars(cls): "all characters in this range that are valid identifier characters, plus underscore '_'" return ( - "".join(filter(str.isidentifier, cls._get_chars_for_ranges())) + "".join(filter(str.isidentifier, cls._chars_for_ranges)) + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº" + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ" + "_" @@ -96,30 +106,30 @@ class pyparsing_unicode(unicode_set): A namespace class for defining common language unicode_sets. """ - _ranges: List[Tuple[int, ...]] = [(32, sys.maxunicode)] + _ranges: UnicodeRangeList = [(32, sys.maxunicode)] class Latin1(unicode_set): "Unicode set for Latin-1 Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x0020, 0x007E), (0x00A0, 0x00FF), ] class LatinA(unicode_set): "Unicode set for Latin-A Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x0100, 0x017F), ] class LatinB(unicode_set): "Unicode set for Latin-B Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x0180, 0x024F), ] class Greek(unicode_set): "Unicode set for Greek Unicode Character Ranges" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x0342, 0x0345), (0x0370, 0x0377), (0x037A, 0x037F), @@ -159,7 +169,7 @@ class pyparsing_unicode(unicode_set): class Cyrillic(unicode_set): "Unicode set for Cyrillic Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x0400, 0x052F), (0x1C80, 0x1C88), (0x1D2B,), @@ -172,7 +182,7 @@ class pyparsing_unicode(unicode_set): class Chinese(unicode_set): "Unicode set for Chinese Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x2E80, 0x2E99), (0x2E9B, 0x2EF3), (0x31C0, 0x31E3), @@ -195,18 +205,18 @@ class pyparsing_unicode(unicode_set): class Japanese(unicode_set): "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" - _ranges: List[Tuple[int, ...]] = [] + _ranges: UnicodeRangeList = [] class Kanji(unicode_set): "Unicode set for Kanji Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x4E00, 0x9FBF), (0x3000, 0x303F), ] class Hiragana(unicode_set): "Unicode set for Hiragana Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x3041, 0x3096), (0x3099, 0x30A0), (0x30FC,), @@ -218,7 +228,7 @@ class pyparsing_unicode(unicode_set): class Katakana(unicode_set): "Unicode set for Katakana Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x3099, 0x309C), (0x30A0, 0x30FF), (0x31F0, 0x31FF), @@ -232,7 +242,7 @@ class pyparsing_unicode(unicode_set): class Hangul(unicode_set): "Unicode set for Hangul (Korean) Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x1100, 0x11FF), (0x302E, 0x302F), (0x3131, 0x318E), @@ -258,11 +268,11 @@ class pyparsing_unicode(unicode_set): class Thai(unicode_set): "Unicode set for Thai Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)] + _ranges: UnicodeRangeList = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)] class Arabic(unicode_set): "Unicode set for Arabic Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x0600, 0x061B), (0x061E, 0x06FF), (0x0700, 0x077F), @@ -270,7 +280,7 @@ class pyparsing_unicode(unicode_set): class Hebrew(unicode_set): "Unicode set for Hebrew Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [ + _ranges: UnicodeRangeList = [ (0x0591, 0x05C7), (0x05D0, 0x05EA), (0x05EF, 0x05F4), @@ -284,7 +294,7 @@ class pyparsing_unicode(unicode_set): class Devanagari(unicode_set): "Unicode set for Devanagari Unicode Character Range" - _ranges: List[Tuple[int, ...]] = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)] + _ranges: UnicodeRangeList = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)] pyparsing_unicode.Japanese._ranges = ( |
