summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorptmcg <ptmcg@austin.rr.com>2021-09-27 05:47:27 -0500
committerptmcg <ptmcg@austin.rr.com>2021-09-27 05:47:27 -0500
commit4e306edc905c8596436220f72dd2ce04b274bb29 (patch)
tree9eecdafef340bf8e75d823939b9554b550b513bb
parentd9b2b46f98e262661a2236b62f89bbadb42edda6 (diff)
downloadpyparsing-git-4e306edc905c8596436220f72dd2ce04b274bb29.tar.gz
Fix type annotation for ranges in unicode_sets; make _get_chars_for_ranges a lazyclassproperty
-rw-r--r--pyparsing/__init__.py4
-rw-r--r--pyparsing/unicode.py62
2 files changed, 38 insertions, 28 deletions
diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
index 45de791..7e2a144 100644
--- a/pyparsing/__init__.py
+++ b/pyparsing/__init__.py
@@ -105,7 +105,7 @@ __version__ = "{}.{}.{}".format(*__version_info__[:3]) + (
),
"",
)[__version_info__.release_level == "final"]
-__version_time__ = "25 September 2021 17:02 UTC"
+__version_time__ = "27 September 2021 10:38 UTC"
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
@@ -119,7 +119,7 @@ from .core import _builtin_exprs as core_builtin_exprs
from .helpers import *
from .helpers import _builtin_exprs as helper_builtin_exprs
-from .unicode import unicode_set, pyparsing_unicode as unicode
+from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
from .testing import pyparsing_test as testing
from .common import (
pyparsing_common as common,
diff --git a/pyparsing/unicode.py b/pyparsing/unicode.py
index cbf6865..9ee6710 100644
--- a/pyparsing/unicode.py
+++ b/pyparsing/unicode.py
@@ -2,7 +2,7 @@
import sys
from itertools import filterfalse
-from typing import List, Tuple
+from typing import List, Tuple, Union
class _lazyclassproperty:
@@ -25,14 +25,24 @@ class _lazyclassproperty:
return cls._intern[attrname]
+UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
+
+
class unicode_set:
"""
A set of Unicode characters, for language-specific strings for
``alphas``, ``nums``, ``alphanums``, and ``printables``.
A unicode_set is defined by a list of ranges in the Unicode character
- set, in a class attribute ``_ranges``, such as::
+ set, in a class attribute ``_ranges``. Ranges can be specified using
+ 2-tuples or a 1-tuple, such as::
- _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),]
+ _ranges = [
+ (0x0020, 0x007e),
+ (0x00a0, 0x00ff),
+ (0x0100,),
+ ]
+
+ Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
A unicode set can also be defined using multiple inheritance of other unicode sets::
@@ -40,10 +50,10 @@ class unicode_set:
pass
"""
- _ranges: List[Tuple[int, ...]] = []
+ _ranges: UnicodeRangeList = []
- @classmethod
- def _get_chars_for_ranges(cls):
+ @_lazyclassproperty
+ def _chars_for_ranges(cls):
ret = []
for cc in cls.__mro__:
if cc is unicode_set:
@@ -55,17 +65,17 @@ class unicode_set:
@_lazyclassproperty
def printables(cls):
"all non-whitespace characters in this range"
- return "".join(filterfalse(str.isspace, cls._get_chars_for_ranges()))
+ return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
@_lazyclassproperty
def alphas(cls):
"all alphabetic characters in this range"
- return "".join(filter(str.isalpha, cls._get_chars_for_ranges()))
+ return "".join(filter(str.isalpha, cls._chars_for_ranges))
@_lazyclassproperty
def nums(cls):
"all numeric digit characters in this range"
- return "".join(filter(str.isdigit, cls._get_chars_for_ranges()))
+ return "".join(filter(str.isdigit, cls._chars_for_ranges))
@_lazyclassproperty
def alphanums(cls):
@@ -76,7 +86,7 @@ class unicode_set:
def identchars(cls):
"all characters in this range that are valid identifier characters, plus underscore '_'"
return (
- "".join(filter(str.isidentifier, cls._get_chars_for_ranges()))
+ "".join(filter(str.isidentifier, cls._chars_for_ranges))
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
+ "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
+ "_"
@@ -96,30 +106,30 @@ class pyparsing_unicode(unicode_set):
A namespace class for defining common language unicode_sets.
"""
- _ranges: List[Tuple[int, ...]] = [(32, sys.maxunicode)]
+ _ranges: UnicodeRangeList = [(32, sys.maxunicode)]
class Latin1(unicode_set):
"Unicode set for Latin-1 Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x0020, 0x007E),
(0x00A0, 0x00FF),
]
class LatinA(unicode_set):
"Unicode set for Latin-A Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x0100, 0x017F),
]
class LatinB(unicode_set):
"Unicode set for Latin-B Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x0180, 0x024F),
]
class Greek(unicode_set):
"Unicode set for Greek Unicode Character Ranges"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x0342, 0x0345),
(0x0370, 0x0377),
(0x037A, 0x037F),
@@ -159,7 +169,7 @@ class pyparsing_unicode(unicode_set):
class Cyrillic(unicode_set):
"Unicode set for Cyrillic Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x0400, 0x052F),
(0x1C80, 0x1C88),
(0x1D2B,),
@@ -172,7 +182,7 @@ class pyparsing_unicode(unicode_set):
class Chinese(unicode_set):
"Unicode set for Chinese Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x2E80, 0x2E99),
(0x2E9B, 0x2EF3),
(0x31C0, 0x31E3),
@@ -195,18 +205,18 @@ class pyparsing_unicode(unicode_set):
class Japanese(unicode_set):
"Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"
- _ranges: List[Tuple[int, ...]] = []
+ _ranges: UnicodeRangeList = []
class Kanji(unicode_set):
"Unicode set for Kanji Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x4E00, 0x9FBF),
(0x3000, 0x303F),
]
class Hiragana(unicode_set):
"Unicode set for Hiragana Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x3041, 0x3096),
(0x3099, 0x30A0),
(0x30FC,),
@@ -218,7 +228,7 @@ class pyparsing_unicode(unicode_set):
class Katakana(unicode_set):
"Unicode set for Katakana Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x3099, 0x309C),
(0x30A0, 0x30FF),
(0x31F0, 0x31FF),
@@ -232,7 +242,7 @@ class pyparsing_unicode(unicode_set):
class Hangul(unicode_set):
"Unicode set for Hangul (Korean) Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x1100, 0x11FF),
(0x302E, 0x302F),
(0x3131, 0x318E),
@@ -258,11 +268,11 @@ class pyparsing_unicode(unicode_set):
class Thai(unicode_set):
"Unicode set for Thai Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)]
+ _ranges: UnicodeRangeList = [(0x0E01, 0x0E3A), (0x0E3F, 0x0E5B)]
class Arabic(unicode_set):
"Unicode set for Arabic Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x0600, 0x061B),
(0x061E, 0x06FF),
(0x0700, 0x077F),
@@ -270,7 +280,7 @@ class pyparsing_unicode(unicode_set):
class Hebrew(unicode_set):
"Unicode set for Hebrew Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [
+ _ranges: UnicodeRangeList = [
(0x0591, 0x05C7),
(0x05D0, 0x05EA),
(0x05EF, 0x05F4),
@@ -284,7 +294,7 @@ class pyparsing_unicode(unicode_set):
class Devanagari(unicode_set):
"Unicode set for Devanagari Unicode Character Range"
- _ranges: List[Tuple[int, ...]] = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)]
+ _ranges: UnicodeRangeList = [(0x0900, 0x097F), (0xA8E0, 0xA8FF)]
pyparsing_unicode.Japanese._ranges = (